1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 9 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 10 { 11 Mat loc_divudotp; 12 Vec p,v,vins,quad_vec,*quad_vecs; 13 ISLocalToGlobalMapping rmap; 14 IS *faces,*edges; 15 PetscScalar *vals; 16 const PetscScalar *array; 17 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 18 PetscMPIInt rank; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 23 if (graph->twodim) { 24 lmaxneighs = 2; 25 } else { 26 lmaxneighs = 1; 27 for (i=0;i<ne;i++) { 28 const PetscInt *idxs; 29 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 30 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 31 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 32 } 33 lmaxneighs++; /* graph count does not include self */ 34 } 35 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 36 maxsize = 0; 37 for (i=0;i<ne;i++) { 38 PetscInt nn; 39 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 40 maxsize = PetscMax(maxsize,nn); 41 } 42 for (i=0;i<nf;i++) { 43 PetscInt nn; 44 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 45 maxsize = PetscMax(maxsize,nn); 46 } 47 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 48 /* create vectors to hold quadrature weights */ 49 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 50 ierr = MatGetLocalToGlobalMapping(A,&rmap,NULL);CHKERRQ(ierr); 51 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 52 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 53 for (i=0;i<maxneighs;i++) { 54 PetscInt first,last; 55 56 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],rmap);CHKERRQ(ierr); 57 /* the near-null space fo BDDC carries information on quadrature weights, 58 and these can be collinear -> so cheat with MatNullSpaceCreate 59 and create a suitable set of basis vectors first */ 60 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 61 if (i>=first && i < last) { 62 PetscScalar *data; 63 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 64 data[i-first] = 1.; 65 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 66 } 67 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 68 } 69 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 70 for (i=0;i<maxneighs;i++) { /* reset vectors */ 71 PetscInt first,last; 72 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 73 if (i>=first && i < last) { 74 PetscScalar *data; 75 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 76 data[i-first] = 0.; 77 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 78 } 79 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 80 } 81 /* compute local quad vec */ 82 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 83 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 84 ierr = VecSet(p,1.);CHKERRQ(ierr); 85 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 86 if (vl2l) { 87 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 88 } else { 89 vins = v; 90 } 91 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 92 ierr = VecDestroy(&p);CHKERRQ(ierr); 93 94 /* insert in global quadrature vecs */ 95 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 96 for (i=0;i<nf;i++) { 97 const PetscInt *idxs; 98 PetscInt idx,nn,j; 99 100 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 101 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 102 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 103 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 104 idx = -(idx+1); 105 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 106 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 107 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 108 } 109 for (i=0;i<ne;i++) { 110 const PetscInt *idxs; 111 PetscInt idx,nn,j; 112 113 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 114 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 115 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 116 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 117 idx = -(idx+1); 118 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 119 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 120 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 121 } 122 ierr = PetscFree(edges);CHKERRQ(ierr); 123 ierr = PetscFree(faces);CHKERRQ(ierr); 124 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 125 if (vl2l) { 126 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 127 } 128 ierr = VecDestroy(&v);CHKERRQ(ierr); 129 ierr = PetscFree(vals);CHKERRQ(ierr); 130 131 /* assemble near null space */ 132 for (i=0;i<maxneighs;i++) { 133 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 134 } 135 for (i=0;i<maxneighs;i++) { 136 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 137 } 138 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 145 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 146 { 147 PetscErrorCode ierr; 148 Vec local,global; 149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 150 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 151 152 PetscFunctionBegin; 153 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 154 /* need to convert from global to local topology information and remove references to information in global ordering */ 155 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 156 if (pcbddc->user_provided_isfordofs) { 157 if (pcbddc->n_ISForDofs) { 158 PetscInt i; 159 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->n_ISForDofs;i++) { 161 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 162 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 163 } 164 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 165 pcbddc->n_ISForDofs = 0; 166 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 167 } 168 } else { 169 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 170 PetscInt i, n = matis->A->rmap->n; 171 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 172 if (i > 1) { 173 pcbddc->n_ISForDofsLocal = i; 174 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 175 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 176 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 177 } 178 } 179 } 180 } 181 182 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 183 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 184 } 185 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 186 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 187 } 188 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 189 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 190 } 191 ierr = VecDestroy(&global);CHKERRQ(ierr); 192 ierr = VecDestroy(&local);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 198 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 199 { 200 PC_IS *pcis = (PC_IS*)(pc->data); 201 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 202 PetscErrorCode ierr; 203 204 PetscFunctionBegin; 205 if (!pcbddc->benign_have_null) { 206 PetscFunctionReturn(0); 207 } 208 if (pcbddc->ChangeOfBasisMatrix) { 209 Vec swap; 210 211 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 212 swap = pcbddc->work_change; 213 pcbddc->work_change = r; 214 r = swap; 215 } 216 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 217 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 218 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 219 ierr = VecSet(z,0.);CHKERRQ(ierr); 220 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 221 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 222 if (pcbddc->ChangeOfBasisMatrix) { 223 Vec swap; 224 225 swap = r; 226 r = pcbddc->work_change; 227 pcbddc->work_change = swap; 228 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 229 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 230 } 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 236 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 237 { 238 PCBDDCBenignMatMult_ctx ctx; 239 PetscErrorCode ierr; 240 PetscBool apply_right,apply_left,reset_x; 241 242 PetscFunctionBegin; 243 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 244 if (transpose) { 245 apply_right = ctx->apply_left; 246 apply_left = ctx->apply_right; 247 } else { 248 apply_right = ctx->apply_right; 249 apply_left = ctx->apply_left; 250 } 251 reset_x = PETSC_FALSE; 252 if (apply_right) { 253 const PetscScalar *ax; 254 PetscInt nl,i; 255 256 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 257 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 258 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 259 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 260 for (i=0;i<ctx->benign_n;i++) { 261 PetscScalar sum,val; 262 const PetscInt *idxs; 263 PetscInt nz,j; 264 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 265 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 266 sum = 0.; 267 if (ctx->apply_p0) { 268 val = ctx->work[idxs[nz-1]]; 269 for (j=0;j<nz-1;j++) { 270 sum += ctx->work[idxs[j]]; 271 ctx->work[idxs[j]] += val; 272 } 273 } else { 274 for (j=0;j<nz-1;j++) { 275 sum += ctx->work[idxs[j]]; 276 } 277 } 278 ctx->work[idxs[nz-1]] -= sum; 279 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 280 } 281 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 282 reset_x = PETSC_TRUE; 283 } 284 if (transpose) { 285 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 286 } else { 287 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 288 } 289 if (reset_x) { 290 ierr = VecResetArray(x);CHKERRQ(ierr); 291 } 292 if (apply_left) { 293 PetscScalar *ay; 294 PetscInt i; 295 296 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 297 for (i=0;i<ctx->benign_n;i++) { 298 PetscScalar sum,val; 299 const PetscInt *idxs; 300 PetscInt nz,j; 301 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 302 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 303 val = -ay[idxs[nz-1]]; 304 if (ctx->apply_p0) { 305 sum = 0.; 306 for (j=0;j<nz-1;j++) { 307 sum += ay[idxs[j]]; 308 ay[idxs[j]] += val; 309 } 310 ay[idxs[nz-1]] += sum; 311 } else { 312 for (j=0;j<nz-1;j++) { 313 ay[idxs[j]] += val; 314 } 315 ay[idxs[nz-1]] = 0.; 316 } 317 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 318 } 319 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 326 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 327 { 328 PetscErrorCode ierr; 329 330 PetscFunctionBegin; 331 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 337 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 338 { 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCBenignShellMat" 348 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 349 { 350 PC_IS *pcis = (PC_IS*)pc->data; 351 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 352 PCBDDCBenignMatMult_ctx ctx; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 if (!restore) { 357 Mat A_IB,A_BI; 358 PetscScalar *work; 359 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 360 361 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 362 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 363 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 364 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 365 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 366 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 367 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 368 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 369 ierr = PetscNew(&ctx);CHKERRQ(ierr); 370 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 371 ctx->apply_left = PETSC_TRUE; 372 ctx->apply_right = PETSC_FALSE; 373 ctx->apply_p0 = PETSC_FALSE; 374 ctx->benign_n = pcbddc->benign_n; 375 if (reuse) { 376 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 377 ctx->free = PETSC_FALSE; 378 } else { /* TODO: could be optimized for successive solves */ 379 ISLocalToGlobalMapping N_to_D; 380 PetscInt i; 381 382 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 383 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 384 for (i=0;i<pcbddc->benign_n;i++) { 385 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 386 } 387 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 388 ctx->free = PETSC_TRUE; 389 } 390 ctx->A = pcis->A_IB; 391 ctx->work = work; 392 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 393 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 pcis->A_IB = A_IB; 396 397 /* A_BI as A_IB^T */ 398 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 399 pcbddc->benign_original_mat = pcis->A_BI; 400 pcis->A_BI = A_BI; 401 } else { 402 if (!pcbddc->benign_original_mat) { 403 PetscFunctionReturn(0); 404 } 405 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 407 pcis->A_IB = ctx->A; 408 ctx->A = NULL; 409 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 410 pcis->A_BI = pcbddc->benign_original_mat; 411 pcbddc->benign_original_mat = NULL; 412 if (ctx->free) { 413 PetscInt i; 414 for (i=0;i<ctx->benign_n;i++) { 415 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 416 } 417 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 418 } 419 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 420 ierr = PetscFree(ctx);CHKERRQ(ierr); 421 } 422 PetscFunctionReturn(0); 423 } 424 425 /* used just in bddc debug mode */ 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCBenignProject" 428 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 429 { 430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 431 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 432 Mat An; 433 PetscErrorCode ierr; 434 435 PetscFunctionBegin; 436 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 437 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 438 if (is1) { 439 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 440 ierr = MatDestroy(&An);CHKERRQ(ierr); 441 } else { 442 *B = An; 443 } 444 PetscFunctionReturn(0); 445 } 446 447 /* TODO: add reuse flag */ 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatSeqAIJCompress" 450 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 451 { 452 Mat Bt; 453 PetscScalar *a,*bdata; 454 const PetscInt *ii,*ij; 455 PetscInt m,n,i,nnz,*bii,*bij; 456 PetscBool flg_row; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 461 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 462 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 463 nnz = n; 464 for (i=0;i<ii[n];i++) { 465 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 466 } 467 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 468 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 469 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 470 nnz = 0; 471 bii[0] = 0; 472 for (i=0;i<n;i++) { 473 PetscInt j; 474 for (j=ii[i];j<ii[i+1];j++) { 475 PetscScalar entry = a[j]; 476 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 477 bij[nnz] = ij[j]; 478 bdata[nnz] = entry; 479 nnz++; 480 } 481 } 482 bii[i+1] = nnz; 483 } 484 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 485 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 486 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 487 { 488 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 489 b->free_a = PETSC_TRUE; 490 b->free_ij = PETSC_TRUE; 491 } 492 *B = Bt; 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatDetectDisconnectedComponents" 498 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 499 { 500 Mat B; 501 IS is_dummy,*cc_n; 502 ISLocalToGlobalMapping l2gmap_dummy; 503 PCBDDCGraph graph; 504 PetscInt i,n; 505 PetscInt *xadj,*adjncy; 506 PetscInt *xadj_filtered,*adjncy_filtered; 507 PetscBool flg_row,isseqaij; 508 PetscErrorCode ierr; 509 510 PetscFunctionBegin; 511 if (!A->rmap->N || !A->cmap->N) { 512 *ncc = 0; 513 *cc = NULL; 514 PetscFunctionReturn(0); 515 } 516 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 517 if (!isseqaij && filter) { 518 PetscBool isseqdense; 519 520 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 521 if (!isseqdense) { 522 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 523 } else { /* TODO: rectangular case and LDA */ 524 PetscScalar *array; 525 PetscReal chop=1.e-6; 526 527 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 528 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 529 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 530 for (i=0;i<n;i++) { 531 PetscInt j; 532 for (j=i+1;j<n;j++) { 533 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 534 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 535 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 536 } 537 } 538 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 539 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 540 } 541 } else { 542 B = A; 543 } 544 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 545 546 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 547 if (filter) { 548 PetscScalar *data; 549 PetscInt j,cum; 550 551 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 552 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 553 cum = 0; 554 for (i=0;i<n;i++) { 555 PetscInt t; 556 557 for (j=xadj[i];j<xadj[i+1];j++) { 558 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 559 continue; 560 } 561 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 562 } 563 t = xadj_filtered[i]; 564 xadj_filtered[i] = cum; 565 cum += t; 566 } 567 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 568 } else { 569 xadj_filtered = NULL; 570 adjncy_filtered = NULL; 571 } 572 573 /* compute local connected components using PCBDDCGraph */ 574 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 575 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 576 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 577 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 578 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 579 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 580 if (xadj_filtered) { 581 graph->xadj = xadj_filtered; 582 graph->adjncy = adjncy_filtered; 583 } else { 584 graph->xadj = xadj; 585 graph->adjncy = adjncy; 586 } 587 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 588 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 589 /* partial clean up */ 590 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 591 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 592 if (A != B) { 593 ierr = MatDestroy(&B);CHKERRQ(ierr); 594 } 595 596 /* get back data */ 597 if (ncc) *ncc = graph->ncc; 598 if (cc) { 599 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 600 for (i=0;i<graph->ncc;i++) { 601 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 602 } 603 *cc = cc_n; 604 } 605 /* clean up graph */ 606 graph->xadj = 0; 607 graph->adjncy = 0; 608 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "PCBDDCBenignCheck" 614 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 615 { 616 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 617 PC_IS* pcis = (PC_IS*)(pc->data); 618 IS dirIS = NULL; 619 PetscInt i; 620 PetscErrorCode ierr; 621 622 PetscFunctionBegin; 623 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 624 if (zerodiag) { 625 Mat A; 626 Vec vec3_N; 627 PetscScalar *vals; 628 const PetscInt *idxs; 629 PetscInt nz,*count; 630 631 /* p0 */ 632 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 633 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 634 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 635 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 636 for (i=0;i<nz;i++) vals[i] = 1.; 637 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 638 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 639 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 640 /* v_I */ 641 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 642 for (i=0;i<nz;i++) vals[i] = 0.; 643 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 644 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 645 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 646 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 647 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 648 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 649 if (dirIS) { 650 PetscInt n; 651 652 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 653 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 654 for (i=0;i<n;i++) vals[i] = 0.; 655 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 657 } 658 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 659 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 660 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 661 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 662 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 663 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 664 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 665 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 666 ierr = PetscFree(vals);CHKERRQ(ierr); 667 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 668 669 /* there should not be any pressure dofs lying on the interface */ 670 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 671 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 672 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 673 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 674 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 675 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 676 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 677 ierr = PetscFree(count);CHKERRQ(ierr); 678 } 679 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 680 681 /* check PCBDDCBenignGetOrSetP0 */ 682 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 683 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 684 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 685 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 686 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 687 for (i=0;i<pcbddc->benign_n;i++) if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 693 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 694 { 695 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 696 IS pressures,zerodiag,*zerodiag_subs; 697 PetscInt nz,n; 698 PetscInt *interior_dofs,n_interior_dofs; 699 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 705 for (n=0;n<pcbddc->benign_n;n++) { 706 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 707 } 708 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 709 pcbddc->benign_n = 0; 710 /* if a local info on dofs is present, assumes that the last field represents "pressures" 711 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 712 Checks if all the pressure dofs in each subdomain have a zero diagonal 713 If not, a change of basis on pressures is not needed 714 since the local Schur complements are already SPD 715 */ 716 has_null_pressures = PETSC_TRUE; 717 have_null = PETSC_TRUE; 718 if (pcbddc->n_ISForDofsLocal) { 719 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 720 721 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 722 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 723 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 724 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 725 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 726 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 727 if (!sorted) { 728 ierr = ISSort(pressures);CHKERRQ(ierr); 729 } 730 } else { 731 pressures = NULL; 732 } 733 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 734 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 735 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 736 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 737 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 738 if (!sorted) { 739 ierr = ISSort(zerodiag);CHKERRQ(ierr); 740 } 741 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 742 if (!nz) { 743 if (n) have_null = PETSC_FALSE; 744 has_null_pressures = PETSC_FALSE; 745 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 746 } 747 recompute_zerodiag = PETSC_FALSE; 748 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 749 zerodiag_subs = NULL; 750 pcbddc->benign_n = 0; 751 n_interior_dofs = 0; 752 interior_dofs = NULL; 753 if (pcbddc->current_level) { /* need to compute interior nodes */ 754 PetscInt n,i,j; 755 PetscInt n_neigh,*neigh,*n_shared,**shared; 756 PetscInt *iwork; 757 758 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 759 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 760 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 761 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 762 for (i=0;i<n_neigh;i++) 763 for (j=0;j<n_shared[i];j++) 764 iwork[shared[i][j]] += 1; 765 for (i=0;i<n;i++) 766 if (!iwork[i]) 767 interior_dofs[n_interior_dofs++] = i; 768 ierr = PetscFree(iwork);CHKERRQ(ierr); 769 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 770 } 771 if (has_null_pressures) { 772 IS *subs; 773 PetscInt nsubs,i,j,nl; 774 const PetscInt *idxs; 775 PetscScalar *array; 776 Vec *work; 777 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 778 779 subs = pcbddc->local_subs; 780 nsubs = pcbddc->n_local_subs; 781 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 782 if (pcbddc->current_level) { 783 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 784 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 785 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 786 /* work[0] = 1_p */ 787 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 788 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 789 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 790 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 791 /* work[0] = 1_v */ 792 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 793 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 794 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 795 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 796 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 797 } 798 if (nsubs > 1) { 799 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 800 for (i=0;i<nsubs;i++) { 801 ISLocalToGlobalMapping l2g; 802 IS t_zerodiag_subs; 803 PetscInt nl; 804 805 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 806 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 807 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 808 if (nl) { 809 PetscBool valid = PETSC_TRUE; 810 811 if (pcbddc->current_level) { 812 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 813 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 814 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 815 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 816 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 817 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 818 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 819 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 820 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 821 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 822 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 823 for (j=0;j<n_interior_dofs;j++) { 824 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 825 valid = PETSC_FALSE; 826 break; 827 } 828 } 829 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 830 } 831 if (valid && pcbddc->NeumannBoundariesLocal) { 832 IS t_bc; 833 PetscInt nzb; 834 835 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 836 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 837 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 838 if (nzb) valid = PETSC_FALSE; 839 } 840 if (valid && pressures) { 841 IS t_pressure_subs; 842 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 843 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 844 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 845 } 846 if (valid) { 847 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 848 pcbddc->benign_n++; 849 } else { 850 recompute_zerodiag = PETSC_TRUE; 851 } 852 } 853 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 854 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 855 } 856 } else { /* there's just one subdomain (or zero if they have not been detected */ 857 PetscBool valid = PETSC_TRUE; 858 859 if (pcbddc->NeumannBoundariesLocal) { 860 PetscInt nzb; 861 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 862 if (nzb) valid = PETSC_FALSE; 863 } 864 if (valid && pressures) { 865 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 866 } 867 if (valid && pcbddc->current_level) { 868 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 869 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 870 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 871 for (j=0;j<n_interior_dofs;j++) { 872 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 873 valid = PETSC_FALSE; 874 break; 875 } 876 } 877 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 878 } 879 if (valid) { 880 pcbddc->benign_n = 1; 881 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 882 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 883 zerodiag_subs[0] = zerodiag; 884 } 885 } 886 if (pcbddc->current_level) { 887 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 888 } 889 } 890 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 891 892 if (!pcbddc->benign_n) { 893 PetscInt n; 894 895 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 896 recompute_zerodiag = PETSC_FALSE; 897 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 898 if (n) { 899 has_null_pressures = PETSC_FALSE; 900 have_null = PETSC_FALSE; 901 } 902 } 903 904 /* final check for null pressures */ 905 if (zerodiag && pressures) { 906 PetscInt nz,np; 907 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 908 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 909 if (nz != np) have_null = PETSC_FALSE; 910 } 911 912 if (recompute_zerodiag) { 913 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 914 if (pcbddc->benign_n == 1) { 915 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 916 zerodiag = zerodiag_subs[0]; 917 } else { 918 PetscInt i,nzn,*new_idxs; 919 920 nzn = 0; 921 for (i=0;i<pcbddc->benign_n;i++) { 922 PetscInt ns; 923 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 924 nzn += ns; 925 } 926 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 927 nzn = 0; 928 for (i=0;i<pcbddc->benign_n;i++) { 929 PetscInt ns,*idxs; 930 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 931 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 932 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 934 nzn += ns; 935 } 936 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 937 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 938 } 939 have_null = PETSC_FALSE; 940 } 941 942 /* Prepare matrix to compute no-net-flux */ 943 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 944 Mat A,loc_divudotp; 945 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 946 IS row,col,isused = NULL; 947 PetscInt M,N,n,st,n_isused; 948 949 if (pressures) { 950 isused = pressures; 951 } else { 952 isused = zerodiag; 953 } 954 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 955 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 956 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 957 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 958 n_isused = 0; 959 if (isused) { 960 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 961 } 962 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 963 st = st-n_isused; 964 if (n) { 965 const PetscInt *gidxs; 966 967 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 969 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 970 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 971 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 972 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 973 } else { 974 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 975 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 976 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 977 } 978 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 979 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 980 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 981 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 982 ierr = ISDestroy(&row);CHKERRQ(ierr); 983 ierr = ISDestroy(&col);CHKERRQ(ierr); 984 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 985 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 986 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 987 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 988 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 989 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 990 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 991 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 992 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 993 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 994 } 995 996 /* change of basis and p0 dofs */ 997 if (has_null_pressures) { 998 IS zerodiagc; 999 const PetscInt *idxs,*idxsc; 1000 PetscInt i,s,*nnz; 1001 1002 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1003 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1004 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1005 /* local change of basis for pressures */ 1006 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1007 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1008 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1009 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1010 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1011 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1012 for (i=0;i<pcbddc->benign_n;i++) { 1013 PetscInt nzs,j; 1014 1015 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1016 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1017 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1018 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1019 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1020 } 1021 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1022 ierr = PetscFree(nnz);CHKERRQ(ierr); 1023 /* set identity on velocities */ 1024 for (i=0;i<n-nz;i++) { 1025 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1026 } 1027 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1028 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1029 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1030 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1031 /* set change on pressures */ 1032 for (s=0;s<pcbddc->benign_n;s++) { 1033 PetscScalar *array; 1034 PetscInt nzs; 1035 1036 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1037 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1038 for (i=0;i<nzs-1;i++) { 1039 PetscScalar vals[2]; 1040 PetscInt cols[2]; 1041 1042 cols[0] = idxs[i]; 1043 cols[1] = idxs[nzs-1]; 1044 vals[0] = 1.; 1045 vals[1] = 1.; 1046 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1047 } 1048 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1049 for (i=0;i<nzs-1;i++) array[i] = -1.; 1050 array[nzs-1] = 1.; 1051 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1052 /* store local idxs for p0 */ 1053 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1054 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1055 ierr = PetscFree(array);CHKERRQ(ierr); 1056 } 1057 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1058 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1059 /* project if needed */ 1060 if (pcbddc->benign_change_explicit) { 1061 Mat M; 1062 1063 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1064 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1065 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1066 ierr = MatDestroy(&M);CHKERRQ(ierr); 1067 } 1068 /* store global idxs for p0 */ 1069 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1070 } 1071 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1072 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1073 1074 /* determines if the coarse solver will be singular or not */ 1075 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1076 /* determines if the problem has subdomains with 0 pressure block */ 1077 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1078 *zerodiaglocal = zerodiag; 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1084 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1085 { 1086 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1087 PetscScalar *array; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 if (!pcbddc->benign_sf) { 1092 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1093 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1094 } 1095 if (get) { 1096 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1097 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1098 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1099 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1100 } else { 1101 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1102 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1103 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1104 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1111 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1112 { 1113 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 /* TODO: add error checking 1118 - avoid nested pop (or push) calls. 1119 - cannot push before pop. 1120 - cannot call this if pcbddc->local_mat is NULL 1121 */ 1122 if (!pcbddc->benign_n) { 1123 PetscFunctionReturn(0); 1124 } 1125 if (pop) { 1126 if (pcbddc->benign_change_explicit) { 1127 IS is_p0; 1128 MatReuse reuse; 1129 1130 /* extract B_0 */ 1131 reuse = MAT_INITIAL_MATRIX; 1132 if (pcbddc->benign_B0) { 1133 reuse = MAT_REUSE_MATRIX; 1134 } 1135 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1136 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1137 /* remove rows and cols from local problem */ 1138 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1139 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1140 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1141 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1142 } else { 1143 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1144 PetscScalar *vals; 1145 PetscInt i,n,*idxs_ins; 1146 1147 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1148 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1149 if (!pcbddc->benign_B0) { 1150 PetscInt *nnz; 1151 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1152 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1153 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1154 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1155 for (i=0;i<pcbddc->benign_n;i++) { 1156 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1157 nnz[i] = n - nnz[i]; 1158 } 1159 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1160 ierr = PetscFree(nnz);CHKERRQ(ierr); 1161 } 1162 1163 for (i=0;i<pcbddc->benign_n;i++) { 1164 PetscScalar *array; 1165 PetscInt *idxs,j,nz,cum; 1166 1167 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1168 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1169 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1170 for (j=0;j<nz;j++) vals[j] = 1.; 1171 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1172 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1173 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1174 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1175 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1176 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1177 cum = 0; 1178 for (j=0;j<n;j++) { 1179 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1180 vals[cum] = array[j]; 1181 idxs_ins[cum] = j; 1182 cum++; 1183 } 1184 } 1185 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1186 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1187 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1188 } 1189 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1190 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1191 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1192 } 1193 } else { /* push */ 1194 if (pcbddc->benign_change_explicit) { 1195 PetscInt i; 1196 1197 for (i=0;i<pcbddc->benign_n;i++) { 1198 PetscScalar *B0_vals; 1199 PetscInt *B0_cols,B0_ncol; 1200 1201 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1202 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1203 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1204 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1205 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1206 } 1207 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1208 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1209 } else { 1210 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1211 } 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1218 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1219 { 1220 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1221 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1222 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1223 PetscBLASInt *B_iwork,*B_ifail; 1224 PetscScalar *work,lwork; 1225 PetscScalar *St,*S,*eigv; 1226 PetscScalar *Sarray,*Starray; 1227 PetscReal *eigs,thresh; 1228 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1229 PetscBool allocated_S_St; 1230 #if defined(PETSC_USE_COMPLEX) 1231 PetscReal *rwork; 1232 #endif 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1237 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1238 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1239 1240 if (pcbddc->dbg_flag) { 1241 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1242 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1243 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1244 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1245 } 1246 1247 if (pcbddc->dbg_flag) { 1248 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1249 } 1250 1251 /* max size of subsets */ 1252 mss = 0; 1253 for (i=0;i<sub_schurs->n_subs;i++) { 1254 PetscInt subset_size; 1255 1256 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1257 mss = PetscMax(mss,subset_size); 1258 } 1259 1260 /* min/max and threshold */ 1261 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1262 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1263 nmax = PetscMax(nmin,nmax); 1264 allocated_S_St = PETSC_FALSE; 1265 if (nmin) { 1266 allocated_S_St = PETSC_TRUE; 1267 } 1268 1269 /* allocate lapack workspace */ 1270 cum = cum2 = 0; 1271 maxneigs = 0; 1272 for (i=0;i<sub_schurs->n_subs;i++) { 1273 PetscInt n,subset_size; 1274 1275 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1276 n = PetscMin(subset_size,nmax); 1277 cum += subset_size; 1278 cum2 += subset_size*n; 1279 maxneigs = PetscMax(maxneigs,n); 1280 } 1281 if (mss) { 1282 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1283 PetscBLASInt B_itype = 1; 1284 PetscBLASInt B_N = mss; 1285 PetscReal zero = 0.0; 1286 PetscReal eps = 0.0; /* dlamch? */ 1287 1288 B_lwork = -1; 1289 S = NULL; 1290 St = NULL; 1291 eigs = NULL; 1292 eigv = NULL; 1293 B_iwork = NULL; 1294 B_ifail = NULL; 1295 #if defined(PETSC_USE_COMPLEX) 1296 rwork = NULL; 1297 #endif 1298 thresh = 1.0; 1299 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1300 #if defined(PETSC_USE_COMPLEX) 1301 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1302 #else 1303 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1304 #endif 1305 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1306 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1307 } else { 1308 /* TODO */ 1309 } 1310 } else { 1311 lwork = 0; 1312 } 1313 1314 nv = 0; 1315 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1316 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1317 } 1318 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1319 if (allocated_S_St) { 1320 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1321 } 1322 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1323 #if defined(PETSC_USE_COMPLEX) 1324 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1325 #endif 1326 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1327 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1328 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1329 nv+cum,&pcbddc->adaptive_constraints_idxs, 1330 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1331 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1332 1333 maxneigs = 0; 1334 cum = cumarray = 0; 1335 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1336 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1337 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1338 const PetscInt *idxs; 1339 1340 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1341 for (cum=0;cum<nv;cum++) { 1342 pcbddc->adaptive_constraints_n[cum] = 1; 1343 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1344 pcbddc->adaptive_constraints_data[cum] = 1.0; 1345 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1346 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1347 } 1348 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1349 } 1350 1351 if (mss) { /* multilevel */ 1352 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1353 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1354 } 1355 1356 thresh = pcbddc->adaptive_threshold; 1357 for (i=0;i<sub_schurs->n_subs;i++) { 1358 const PetscInt *idxs; 1359 PetscReal upper,lower; 1360 PetscInt j,subset_size,eigs_start = 0; 1361 PetscBLASInt B_N; 1362 PetscBool same_data = PETSC_FALSE; 1363 1364 if (pcbddc->use_deluxe_scaling) { 1365 upper = PETSC_MAX_REAL; 1366 lower = thresh; 1367 } else { 1368 upper = 1./thresh; 1369 lower = 0.; 1370 } 1371 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1372 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1373 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1374 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1375 if (sub_schurs->is_hermitian) { 1376 PetscInt j,k; 1377 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1378 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1379 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1380 } 1381 for (j=0;j<subset_size;j++) { 1382 for (k=j;k<subset_size;k++) { 1383 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1384 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1385 } 1386 } 1387 } else { 1388 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1389 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1390 } 1391 } else { 1392 S = Sarray + cumarray; 1393 St = Starray + cumarray; 1394 } 1395 /* see if we can save some work */ 1396 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1397 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1398 } 1399 1400 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1401 B_neigs = 0; 1402 } else { 1403 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1404 PetscBLASInt B_itype = 1; 1405 PetscBLASInt B_IL, B_IU; 1406 PetscReal eps = -1.0; /* dlamch? */ 1407 PetscInt nmin_s; 1408 PetscBool compute_range = PETSC_FALSE; 1409 1410 if (pcbddc->dbg_flag) { 1411 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1412 } 1413 1414 compute_range = PETSC_FALSE; 1415 if (thresh > 1.+PETSC_SMALL && !same_data) { 1416 compute_range = PETSC_TRUE; 1417 } 1418 1419 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1420 if (compute_range) { 1421 1422 /* ask for eigenvalues larger than thresh */ 1423 #if defined(PETSC_USE_COMPLEX) 1424 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1425 #else 1426 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1427 #endif 1428 } else if (!same_data) { 1429 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1430 B_IL = 1; 1431 #if defined(PETSC_USE_COMPLEX) 1432 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1433 #else 1434 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1435 #endif 1436 } else { /* same_data is true, so get the adaptive function requested by the user */ 1437 PetscInt k; 1438 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1439 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1440 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1441 nmin = nmax; 1442 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1443 for (k=0;k<nmax;k++) { 1444 eigs[k] = 1./PETSC_SMALL; 1445 eigv[k*(subset_size+1)] = 1.0; 1446 } 1447 } 1448 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1449 if (B_ierr) { 1450 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1451 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1452 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1453 } 1454 1455 if (B_neigs > nmax) { 1456 if (pcbddc->dbg_flag) { 1457 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1458 } 1459 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1460 B_neigs = nmax; 1461 } 1462 1463 nmin_s = PetscMin(nmin,B_N); 1464 if (B_neigs < nmin_s) { 1465 PetscBLASInt B_neigs2; 1466 1467 if (pcbddc->use_deluxe_scaling) { 1468 B_IL = B_N - nmin_s + 1; 1469 B_IU = B_N - B_neigs; 1470 } else { 1471 B_IL = B_neigs + 1; 1472 B_IU = nmin_s; 1473 } 1474 if (pcbddc->dbg_flag) { 1475 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1476 } 1477 if (sub_schurs->is_hermitian) { 1478 PetscInt j,k; 1479 for (j=0;j<subset_size;j++) { 1480 for (k=j;k<subset_size;k++) { 1481 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1482 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1483 } 1484 } 1485 } else { 1486 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1487 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1488 } 1489 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1490 #if defined(PETSC_USE_COMPLEX) 1491 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1492 #else 1493 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1494 #endif 1495 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1496 B_neigs += B_neigs2; 1497 } 1498 if (B_ierr) { 1499 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1500 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1501 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1502 } 1503 if (pcbddc->dbg_flag) { 1504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1505 for (j=0;j<B_neigs;j++) { 1506 if (eigs[j] == 0.0) { 1507 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1508 } else { 1509 if (pcbddc->use_deluxe_scaling) { 1510 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1511 } else { 1512 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1513 } 1514 } 1515 } 1516 } 1517 } else { 1518 /* TODO */ 1519 } 1520 } 1521 /* change the basis back to the original one */ 1522 if (sub_schurs->change) { 1523 Mat change,phi,phit; 1524 1525 if (pcbddc->dbg_flag > 1) { 1526 PetscInt ii; 1527 for (ii=0;ii<B_neigs;ii++) { 1528 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1529 for (j=0;j<B_N;j++) { 1530 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1531 } 1532 } 1533 } 1534 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1535 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1536 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1537 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1538 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1539 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1540 } 1541 maxneigs = PetscMax(B_neigs,maxneigs); 1542 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1543 if (B_neigs) { 1544 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1545 1546 if (pcbddc->dbg_flag > 1) { 1547 PetscInt ii; 1548 for (ii=0;ii<B_neigs;ii++) { 1549 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1550 for (j=0;j<B_N;j++) { 1551 #if defined(PETSC_USE_COMPLEX) 1552 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1553 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1554 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1555 #else 1556 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1557 #endif 1558 } 1559 } 1560 } 1561 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1562 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1563 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1564 cum++; 1565 } 1566 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1567 /* shift for next computation */ 1568 cumarray += subset_size*subset_size; 1569 } 1570 if (pcbddc->dbg_flag) { 1571 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1572 } 1573 1574 if (mss) { 1575 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1576 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1577 /* destroy matrices (junk) */ 1578 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1579 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1580 } 1581 if (allocated_S_St) { 1582 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1583 } 1584 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1585 #if defined(PETSC_USE_COMPLEX) 1586 ierr = PetscFree(rwork);CHKERRQ(ierr); 1587 #endif 1588 if (pcbddc->dbg_flag) { 1589 PetscInt maxneigs_r; 1590 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1592 } 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "PCBDDCSetUpSolvers" 1598 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1599 { 1600 PetscScalar *coarse_submat_vals; 1601 PetscErrorCode ierr; 1602 1603 PetscFunctionBegin; 1604 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1605 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1606 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1607 1608 /* Setup local neumann solver ksp_R */ 1609 /* PCBDDCSetUpLocalScatters should be called first! */ 1610 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1611 1612 /* 1613 Setup local correction and local part of coarse basis. 1614 Gives back the dense local part of the coarse matrix in column major ordering 1615 */ 1616 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1617 1618 /* Compute total number of coarse nodes and setup coarse solver */ 1619 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1620 1621 /* free */ 1622 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1623 PetscFunctionReturn(0); 1624 } 1625 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "PCBDDCResetCustomization" 1628 PetscErrorCode PCBDDCResetCustomization(PC pc) 1629 { 1630 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1631 PetscErrorCode ierr; 1632 1633 PetscFunctionBegin; 1634 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1635 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1636 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1637 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1638 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1639 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1640 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1641 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1642 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1643 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1644 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "PCBDDCResetTopography" 1650 PetscErrorCode PCBDDCResetTopography(PC pc) 1651 { 1652 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1653 PetscInt i; 1654 PetscErrorCode ierr; 1655 1656 PetscFunctionBegin; 1657 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1658 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1659 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1660 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1661 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1662 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1663 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1664 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1665 for (i=0;i<pcbddc->n_local_subs;i++) { 1666 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1667 } 1668 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1669 if (pcbddc->sub_schurs) { 1670 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1671 } 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "PCBDDCResetSolvers" 1677 PetscErrorCode PCBDDCResetSolvers(PC pc) 1678 { 1679 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1680 PetscErrorCode ierr; 1681 1682 PetscFunctionBegin; 1683 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1684 if (pcbddc->coarse_phi_B) { 1685 PetscScalar *array; 1686 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1687 ierr = PetscFree(array);CHKERRQ(ierr); 1688 } 1689 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1690 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1691 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1692 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1693 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1694 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1695 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1696 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1697 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1698 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1699 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1700 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1701 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1702 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1703 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1704 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1705 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1706 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1707 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1708 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1709 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1710 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1711 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1712 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1713 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1714 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1715 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1716 if (pcbddc->benign_zerodiag_subs) { 1717 PetscInt i; 1718 for (i=0;i<pcbddc->benign_n;i++) { 1719 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1720 } 1721 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1722 } 1723 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1729 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1730 { 1731 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1732 PC_IS *pcis = (PC_IS*)pc->data; 1733 VecType impVecType; 1734 PetscInt n_constraints,n_R,old_size; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1739 /* get sizes */ 1740 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1741 n_R = pcis->n - pcbddc->n_vertices; 1742 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1743 /* local work vectors (try to avoid unneeded work)*/ 1744 /* R nodes */ 1745 old_size = -1; 1746 if (pcbddc->vec1_R) { 1747 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1748 } 1749 if (n_R != old_size) { 1750 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1751 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1752 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1753 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1754 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1755 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1756 } 1757 /* local primal dofs */ 1758 old_size = -1; 1759 if (pcbddc->vec1_P) { 1760 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1761 } 1762 if (pcbddc->local_primal_size != old_size) { 1763 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1764 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1765 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1766 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1767 } 1768 /* local explicit constraints */ 1769 old_size = -1; 1770 if (pcbddc->vec1_C) { 1771 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1772 } 1773 if (n_constraints && n_constraints != old_size) { 1774 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1775 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1776 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1777 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1778 } 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "PCBDDCSetUpCorrection" 1784 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1785 { 1786 PetscErrorCode ierr; 1787 /* pointers to pcis and pcbddc */ 1788 PC_IS* pcis = (PC_IS*)pc->data; 1789 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1790 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1791 /* submatrices of local problem */ 1792 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1793 /* submatrices of local coarse problem */ 1794 Mat S_VV,S_CV,S_VC,S_CC; 1795 /* working matrices */ 1796 Mat C_CR; 1797 /* additional working stuff */ 1798 PC pc_R; 1799 Mat F; 1800 Vec dummy_vec; 1801 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1802 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1803 PetscScalar *work; 1804 PetscInt *idx_V_B; 1805 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1806 PetscInt i,n_R,n_D,n_B; 1807 1808 /* some shortcuts to scalars */ 1809 PetscScalar one=1.0,m_one=-1.0; 1810 1811 PetscFunctionBegin; 1812 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1813 1814 /* Set Non-overlapping dimensions */ 1815 n_vertices = pcbddc->n_vertices; 1816 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1817 n_B = pcis->n_B; 1818 n_D = pcis->n - n_B; 1819 n_R = pcis->n - n_vertices; 1820 1821 /* vertices in boundary numbering */ 1822 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1823 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1824 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1825 1826 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1827 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1828 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1829 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1830 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1831 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1833 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1835 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1836 1837 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1838 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1839 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1840 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1841 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1842 lda_rhs = n_R; 1843 need_benign_correction = PETSC_FALSE; 1844 if (isLU || isILU || isCHOL) { 1845 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1846 } else if (sub_schurs && sub_schurs->reuse_solver) { 1847 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1848 MatFactorType type; 1849 1850 F = reuse_solver->F; 1851 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1852 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1853 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1854 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1855 } else { 1856 F = NULL; 1857 } 1858 1859 /* allocate workspace */ 1860 n = 0; 1861 if (n_constraints) { 1862 n += lda_rhs*n_constraints; 1863 } 1864 if (n_vertices) { 1865 n = PetscMax(2*lda_rhs*n_vertices,n); 1866 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1867 } 1868 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1869 1870 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1871 dummy_vec = NULL; 1872 if (need_benign_correction && lda_rhs != n_R && F) { 1873 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1874 } 1875 1876 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1877 if (n_constraints) { 1878 Mat M1,M2,M3,C_B; 1879 IS is_aux; 1880 PetscScalar *array,*array2; 1881 1882 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1883 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1884 1885 /* Extract constraints on R nodes: C_{CR} */ 1886 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1887 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1888 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1889 1890 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1891 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1892 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1893 for (i=0;i<n_constraints;i++) { 1894 const PetscScalar *row_cmat_values; 1895 const PetscInt *row_cmat_indices; 1896 PetscInt size_of_constraint,j; 1897 1898 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1899 for (j=0;j<size_of_constraint;j++) { 1900 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1901 } 1902 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1903 } 1904 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1905 if (F) { 1906 Mat B; 1907 1908 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1909 if (need_benign_correction) { 1910 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1911 1912 /* rhs is already zero on interior dofs, no need to change the rhs */ 1913 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1914 } 1915 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1916 if (need_benign_correction) { 1917 PetscScalar *marr; 1918 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1919 1920 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1921 if (lda_rhs != n_R) { 1922 for (i=0;i<n_constraints;i++) { 1923 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1924 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1925 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1926 } 1927 } else { 1928 for (i=0;i<n_constraints;i++) { 1929 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1930 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1931 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1932 } 1933 } 1934 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1935 } 1936 ierr = MatDestroy(&B);CHKERRQ(ierr); 1937 } else { 1938 PetscScalar *marr; 1939 1940 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1941 for (i=0;i<n_constraints;i++) { 1942 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1943 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1944 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1945 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1946 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1947 } 1948 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1949 } 1950 if (!pcbddc->switch_static) { 1951 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1952 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1953 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1954 for (i=0;i<n_constraints;i++) { 1955 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1956 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1957 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1959 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1960 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1961 } 1962 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1963 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1964 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1965 } else { 1966 if (lda_rhs != n_R) { 1967 IS dummy; 1968 1969 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1970 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1971 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1972 } else { 1973 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1974 pcbddc->local_auxmat2 = local_auxmat2_R; 1975 } 1976 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1977 } 1978 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1979 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1980 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1981 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1982 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1983 if (isCHOL) { 1984 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1985 } else { 1986 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1987 } 1988 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1989 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1990 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1991 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1992 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1993 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1994 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1995 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1996 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1997 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1998 } 1999 2000 /* Get submatrices from subdomain matrix */ 2001 if (n_vertices) { 2002 IS is_aux; 2003 2004 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2005 IS tis; 2006 2007 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2008 ierr = ISSort(tis);CHKERRQ(ierr); 2009 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2010 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2011 } else { 2012 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2013 } 2014 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2015 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2016 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2017 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2018 } 2019 2020 /* Matrix of coarse basis functions (local) */ 2021 if (pcbddc->coarse_phi_B) { 2022 PetscInt on_B,on_primal,on_D=n_D; 2023 if (pcbddc->coarse_phi_D) { 2024 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2025 } 2026 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2027 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2028 PetscScalar *marray; 2029 2030 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2031 ierr = PetscFree(marray);CHKERRQ(ierr); 2032 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2033 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2034 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2035 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2036 } 2037 } 2038 2039 if (!pcbddc->coarse_phi_B) { 2040 PetscScalar *marray; 2041 2042 n = n_B*pcbddc->local_primal_size; 2043 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2044 n += n_D*pcbddc->local_primal_size; 2045 } 2046 if (!pcbddc->symmetric_primal) { 2047 n *= 2; 2048 } 2049 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2050 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2051 n = n_B*pcbddc->local_primal_size; 2052 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2053 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2054 n += n_D*pcbddc->local_primal_size; 2055 } 2056 if (!pcbddc->symmetric_primal) { 2057 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2058 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2059 n = n_B*pcbddc->local_primal_size; 2060 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2061 } 2062 } else { 2063 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2064 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2065 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2066 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2067 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2068 } 2069 } 2070 } 2071 2072 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2073 p0_lidx_I = NULL; 2074 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2075 const PetscInt *idxs; 2076 2077 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2078 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2079 for (i=0;i<pcbddc->benign_n;i++) { 2080 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2081 } 2082 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2083 } 2084 2085 /* vertices */ 2086 if (n_vertices) { 2087 2088 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2089 2090 if (n_R) { 2091 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2092 PetscBLASInt B_N,B_one = 1; 2093 PetscScalar *x,*y; 2094 PetscBool isseqaij; 2095 2096 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2097 if (need_benign_correction) { 2098 ISLocalToGlobalMapping RtoN; 2099 IS is_p0; 2100 PetscInt *idxs_p0,n; 2101 2102 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2103 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2104 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2105 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2106 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2107 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2108 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2109 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2110 } 2111 2112 if (lda_rhs == n_R) { 2113 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2114 } else { 2115 PetscScalar *av,*array; 2116 const PetscInt *xadj,*adjncy; 2117 PetscInt n; 2118 PetscBool flg_row; 2119 2120 array = work+lda_rhs*n_vertices; 2121 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2122 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2123 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2124 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2125 for (i=0;i<n;i++) { 2126 PetscInt j; 2127 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2128 } 2129 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2130 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2131 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2132 } 2133 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2134 if (need_benign_correction) { 2135 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2136 PetscScalar *marr; 2137 2138 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2139 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2140 2141 | 0 0 0 | (V) 2142 L = | 0 0 -1 | (P-p0) 2143 | 0 0 -1 | (p0) 2144 2145 */ 2146 for (i=0;i<reuse_solver->benign_n;i++) { 2147 const PetscScalar *vals; 2148 const PetscInt *idxs,*idxs_zero; 2149 PetscInt n,j,nz; 2150 2151 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2152 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2153 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2154 for (j=0;j<n;j++) { 2155 PetscScalar val = vals[j]; 2156 PetscInt k,col = idxs[j]; 2157 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2158 } 2159 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2160 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2161 } 2162 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2163 } 2164 if (F) { 2165 /* need to correct the rhs */ 2166 if (need_benign_correction) { 2167 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2168 PetscScalar *marr; 2169 2170 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2171 if (lda_rhs != n_R) { 2172 for (i=0;i<n_vertices;i++) { 2173 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2174 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2175 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2176 } 2177 } else { 2178 for (i=0;i<n_vertices;i++) { 2179 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2180 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2181 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2182 } 2183 } 2184 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2185 } 2186 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2187 /* need to correct the solution */ 2188 if (need_benign_correction) { 2189 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2190 PetscScalar *marr; 2191 2192 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2193 if (lda_rhs != n_R) { 2194 for (i=0;i<n_vertices;i++) { 2195 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2196 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2197 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2198 } 2199 } else { 2200 for (i=0;i<n_vertices;i++) { 2201 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2202 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2203 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2204 } 2205 } 2206 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2207 } 2208 } else { 2209 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2210 for (i=0;i<n_vertices;i++) { 2211 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2212 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2213 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2214 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2215 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2216 } 2217 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2218 } 2219 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2220 /* S_VV and S_CV */ 2221 if (n_constraints) { 2222 Mat B; 2223 2224 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2225 for (i=0;i<n_vertices;i++) { 2226 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2227 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2228 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2229 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2230 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2231 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2232 } 2233 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2234 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2235 ierr = MatDestroy(&B);CHKERRQ(ierr); 2236 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2237 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2238 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2239 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2240 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2241 ierr = MatDestroy(&B);CHKERRQ(ierr); 2242 } 2243 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2244 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2245 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2246 } 2247 if (lda_rhs != n_R) { 2248 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2250 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2251 } 2252 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2253 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2254 if (need_benign_correction) { 2255 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2256 PetscScalar *marr,*sums; 2257 2258 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2259 ierr = MatDenseGetArray(S_VVt,&marr); 2260 for (i=0;i<reuse_solver->benign_n;i++) { 2261 const PetscScalar *vals; 2262 const PetscInt *idxs,*idxs_zero; 2263 PetscInt n,j,nz; 2264 2265 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2266 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2267 for (j=0;j<n_vertices;j++) { 2268 PetscInt k; 2269 sums[j] = 0.; 2270 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2271 } 2272 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2273 for (j=0;j<n;j++) { 2274 PetscScalar val = vals[j]; 2275 PetscInt k; 2276 for (k=0;k<n_vertices;k++) { 2277 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2278 } 2279 } 2280 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2281 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2282 } 2283 ierr = PetscFree(sums);CHKERRQ(ierr); 2284 ierr = MatDenseRestoreArray(S_VVt,&marr); 2285 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2286 } 2287 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2288 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2289 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2290 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2291 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2292 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2293 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2294 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2295 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2296 } else { 2297 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2298 } 2299 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2300 2301 /* coarse basis functions */ 2302 for (i=0;i<n_vertices;i++) { 2303 PetscScalar *y; 2304 2305 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2306 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2307 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2308 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2309 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2310 y[n_B*i+idx_V_B[i]] = 1.0; 2311 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2312 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2313 2314 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2315 PetscInt j; 2316 2317 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2318 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2319 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2320 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2321 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2322 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2323 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2324 } 2325 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2326 } 2327 /* if n_R == 0 the object is not destroyed */ 2328 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2329 } 2330 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2331 2332 if (n_constraints) { 2333 Mat B; 2334 2335 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2336 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2337 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2338 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2339 if (n_vertices) { 2340 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2341 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2342 } else { 2343 Mat S_VCt; 2344 2345 if (lda_rhs != n_R) { 2346 ierr = MatDestroy(&B);CHKERRQ(ierr); 2347 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2348 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2349 } 2350 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2351 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2352 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2353 } 2354 } 2355 ierr = MatDestroy(&B);CHKERRQ(ierr); 2356 /* coarse basis functions */ 2357 for (i=0;i<n_constraints;i++) { 2358 PetscScalar *y; 2359 2360 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2361 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2362 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2363 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2364 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2365 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2366 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2367 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2368 PetscInt j; 2369 2370 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2371 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2372 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2373 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2374 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2375 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2376 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2377 } 2378 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2379 } 2380 } 2381 if (n_constraints) { 2382 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2383 } 2384 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2385 2386 /* coarse matrix entries relative to B_0 */ 2387 if (pcbddc->benign_n) { 2388 Mat B0_B,B0_BPHI; 2389 IS is_dummy; 2390 PetscScalar *data; 2391 PetscInt j; 2392 2393 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2394 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2395 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2396 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2397 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2398 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2399 for (j=0;j<pcbddc->benign_n;j++) { 2400 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2401 for (i=0;i<pcbddc->local_primal_size;i++) { 2402 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2403 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2404 } 2405 } 2406 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2407 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2408 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2409 } 2410 2411 /* compute other basis functions for non-symmetric problems */ 2412 if (!pcbddc->symmetric_primal) { 2413 Mat B_V=NULL,B_C=NULL; 2414 PetscScalar *marray; 2415 2416 if (n_constraints) { 2417 Mat S_CCT,C_CRT; 2418 2419 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2420 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2421 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2422 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2423 if (n_vertices) { 2424 Mat S_VCT; 2425 2426 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2427 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2428 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2429 } 2430 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2431 } else { 2432 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2433 } 2434 if (n_vertices && n_R) { 2435 PetscScalar *av,*marray; 2436 const PetscInt *xadj,*adjncy; 2437 PetscInt n; 2438 PetscBool flg_row; 2439 2440 /* B_V = B_V - A_VR^T */ 2441 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2442 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2443 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2444 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2445 for (i=0;i<n;i++) { 2446 PetscInt j; 2447 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2448 } 2449 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2450 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2451 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2452 } 2453 2454 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2455 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2456 for (i=0;i<n_vertices;i++) { 2457 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2458 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2459 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2460 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2461 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2462 } 2463 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2464 if (B_C) { 2465 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2466 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2467 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2468 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2469 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2470 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2471 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2472 } 2473 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2474 } 2475 /* coarse basis functions */ 2476 for (i=0;i<pcbddc->local_primal_size;i++) { 2477 PetscScalar *y; 2478 2479 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2480 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2481 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2482 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2483 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2484 if (i<n_vertices) { 2485 y[n_B*i+idx_V_B[i]] = 1.0; 2486 } 2487 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2488 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2489 2490 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2491 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2492 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2493 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2494 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2495 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2496 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2497 } 2498 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2499 } 2500 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2501 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2502 } 2503 /* free memory */ 2504 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2505 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2506 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2507 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2508 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2509 ierr = PetscFree(work);CHKERRQ(ierr); 2510 if (n_vertices) { 2511 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2512 } 2513 if (n_constraints) { 2514 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2515 } 2516 /* Checking coarse_sub_mat and coarse basis functios */ 2517 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2518 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2519 if (pcbddc->dbg_flag) { 2520 Mat coarse_sub_mat; 2521 Mat AUXMAT,TM1,TM2,TM3,TM4; 2522 Mat coarse_phi_D,coarse_phi_B; 2523 Mat coarse_psi_D,coarse_psi_B; 2524 Mat A_II,A_BB,A_IB,A_BI; 2525 Mat C_B,CPHI; 2526 IS is_dummy; 2527 Vec mones; 2528 MatType checkmattype=MATSEQAIJ; 2529 PetscReal real_value; 2530 2531 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2532 Mat A; 2533 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2534 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2535 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2536 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2537 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2538 ierr = MatDestroy(&A);CHKERRQ(ierr); 2539 } else { 2540 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2541 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2542 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2543 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2544 } 2545 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2546 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2547 if (!pcbddc->symmetric_primal) { 2548 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2549 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2550 } 2551 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2552 2553 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2554 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2555 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2556 if (!pcbddc->symmetric_primal) { 2557 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2558 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2559 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2560 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2561 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2562 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2563 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2564 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2565 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2566 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2567 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2568 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2569 } else { 2570 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2571 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2572 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2573 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2574 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2575 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2576 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2577 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2578 } 2579 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2580 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2581 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2582 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2583 if (pcbddc->benign_n) { 2584 Mat B0_B,B0_BPHI; 2585 PetscScalar *data,*data2; 2586 PetscInt j; 2587 2588 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2589 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2590 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2591 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2592 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2593 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2594 for (j=0;j<pcbddc->benign_n;j++) { 2595 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2596 for (i=0;i<pcbddc->local_primal_size;i++) { 2597 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2598 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2599 } 2600 } 2601 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2602 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2603 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2604 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2605 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2606 } 2607 #if 0 2608 { 2609 PetscViewer viewer; 2610 char filename[256]; 2611 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2612 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2613 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2614 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2615 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2616 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2617 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2618 if (save_change) { 2619 Mat phi_B; 2620 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2621 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2622 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2623 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2624 } else { 2625 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2626 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2627 } 2628 if (pcbddc->coarse_phi_D) { 2629 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2630 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2631 } 2632 if (pcbddc->coarse_psi_B) { 2633 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2634 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2635 } 2636 if (pcbddc->coarse_psi_D) { 2637 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2638 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2639 } 2640 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2641 } 2642 #endif 2643 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2644 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2645 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2647 2648 /* check constraints */ 2649 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2650 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2651 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2652 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2653 } else { 2654 PetscScalar *data; 2655 Mat tmat; 2656 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2657 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2658 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2659 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2660 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2661 } 2662 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2663 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2664 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2665 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2666 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2667 if (!pcbddc->symmetric_primal) { 2668 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2669 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2670 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2671 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2672 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2673 } 2674 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2675 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2676 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2677 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2678 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2679 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2680 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2681 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2682 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2683 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2684 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2685 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2686 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2687 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2688 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2689 if (!pcbddc->symmetric_primal) { 2690 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2691 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2692 } 2693 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2694 } 2695 /* get back data */ 2696 *coarse_submat_vals_n = coarse_submat_vals; 2697 PetscFunctionReturn(0); 2698 } 2699 2700 #undef __FUNCT__ 2701 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2702 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2703 { 2704 Mat *work_mat; 2705 IS isrow_s,iscol_s; 2706 PetscBool rsorted,csorted; 2707 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2712 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2713 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2714 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2715 2716 if (!rsorted) { 2717 const PetscInt *idxs; 2718 PetscInt *idxs_sorted,i; 2719 2720 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2721 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2722 for (i=0;i<rsize;i++) { 2723 idxs_perm_r[i] = i; 2724 } 2725 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2726 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2727 for (i=0;i<rsize;i++) { 2728 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2729 } 2730 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2731 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2732 } else { 2733 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2734 isrow_s = isrow; 2735 } 2736 2737 if (!csorted) { 2738 if (isrow == iscol) { 2739 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2740 iscol_s = isrow_s; 2741 } else { 2742 const PetscInt *idxs; 2743 PetscInt *idxs_sorted,i; 2744 2745 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2746 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2747 for (i=0;i<csize;i++) { 2748 idxs_perm_c[i] = i; 2749 } 2750 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2751 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2752 for (i=0;i<csize;i++) { 2753 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2754 } 2755 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2756 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2757 } 2758 } else { 2759 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2760 iscol_s = iscol; 2761 } 2762 2763 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2764 2765 if (!rsorted || !csorted) { 2766 Mat new_mat; 2767 IS is_perm_r,is_perm_c; 2768 2769 if (!rsorted) { 2770 PetscInt *idxs_r,i; 2771 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2772 for (i=0;i<rsize;i++) { 2773 idxs_r[idxs_perm_r[i]] = i; 2774 } 2775 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2776 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2777 } else { 2778 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2779 } 2780 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2781 2782 if (!csorted) { 2783 if (isrow_s == iscol_s) { 2784 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2785 is_perm_c = is_perm_r; 2786 } else { 2787 PetscInt *idxs_c,i; 2788 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2789 for (i=0;i<csize;i++) { 2790 idxs_c[idxs_perm_c[i]] = i; 2791 } 2792 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2793 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2794 } 2795 } else { 2796 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2797 } 2798 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2799 2800 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2801 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2802 work_mat[0] = new_mat; 2803 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2804 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2805 } 2806 2807 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2808 *B = work_mat[0]; 2809 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2810 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2811 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2817 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2818 { 2819 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2820 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2821 Mat new_mat; 2822 IS is_local,is_global; 2823 PetscInt local_size; 2824 PetscBool isseqaij; 2825 PetscErrorCode ierr; 2826 2827 PetscFunctionBegin; 2828 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2829 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2830 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2831 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2832 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2833 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2834 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2835 2836 /* check */ 2837 if (pcbddc->dbg_flag) { 2838 Vec x,x_change; 2839 PetscReal error; 2840 2841 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2842 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2843 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2844 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2846 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2847 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2848 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2849 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2850 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2851 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2852 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2853 ierr = VecDestroy(&x);CHKERRQ(ierr); 2854 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2855 } 2856 2857 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2858 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2859 if (isseqaij) { 2860 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2861 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2862 } else { 2863 Mat work_mat; 2864 2865 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2866 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2867 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2868 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2869 } 2870 if (matis->A->symmetric_set) { 2871 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2872 #if !defined(PETSC_USE_COMPLEX) 2873 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2874 #endif 2875 } 2876 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2877 PetscFunctionReturn(0); 2878 } 2879 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2882 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2883 { 2884 PC_IS* pcis = (PC_IS*)(pc->data); 2885 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2886 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2887 PetscInt *idx_R_local=NULL; 2888 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2889 PetscInt vbs,bs; 2890 PetscBT bitmask=NULL; 2891 PetscErrorCode ierr; 2892 2893 PetscFunctionBegin; 2894 /* 2895 No need to setup local scatters if 2896 - primal space is unchanged 2897 AND 2898 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2899 AND 2900 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2901 */ 2902 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2903 PetscFunctionReturn(0); 2904 } 2905 /* destroy old objects */ 2906 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2907 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2908 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2909 /* Set Non-overlapping dimensions */ 2910 n_B = pcis->n_B; 2911 n_D = pcis->n - n_B; 2912 n_vertices = pcbddc->n_vertices; 2913 2914 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2915 2916 /* create auxiliary bitmask and allocate workspace */ 2917 if (!sub_schurs || !sub_schurs->reuse_solver) { 2918 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2919 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2920 for (i=0;i<n_vertices;i++) { 2921 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2922 } 2923 2924 for (i=0, n_R=0; i<pcis->n; i++) { 2925 if (!PetscBTLookup(bitmask,i)) { 2926 idx_R_local[n_R++] = i; 2927 } 2928 } 2929 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2930 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2931 2932 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2933 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2934 } 2935 2936 /* Block code */ 2937 vbs = 1; 2938 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2939 if (bs>1 && !(n_vertices%bs)) { 2940 PetscBool is_blocked = PETSC_TRUE; 2941 PetscInt *vary; 2942 if (!sub_schurs || !sub_schurs->reuse_solver) { 2943 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2944 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2945 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2946 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2947 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2948 for (i=0; i<pcis->n/bs; i++) { 2949 if (vary[i]!=0 && vary[i]!=bs) { 2950 is_blocked = PETSC_FALSE; 2951 break; 2952 } 2953 } 2954 ierr = PetscFree(vary);CHKERRQ(ierr); 2955 } else { 2956 /* Verify directly the R set */ 2957 for (i=0; i<n_R/bs; i++) { 2958 PetscInt j,node=idx_R_local[bs*i]; 2959 for (j=1; j<bs; j++) { 2960 if (node != idx_R_local[bs*i+j]-j) { 2961 is_blocked = PETSC_FALSE; 2962 break; 2963 } 2964 } 2965 } 2966 } 2967 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2968 vbs = bs; 2969 for (i=0;i<n_R/vbs;i++) { 2970 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2971 } 2972 } 2973 } 2974 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2975 if (sub_schurs && sub_schurs->reuse_solver) { 2976 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2977 2978 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2979 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2980 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2981 reuse_solver->is_R = pcbddc->is_R_local; 2982 } else { 2983 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2984 } 2985 2986 /* print some info if requested */ 2987 if (pcbddc->dbg_flag) { 2988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2990 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2995 } 2996 2997 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2998 if (!sub_schurs || !sub_schurs->reuse_solver) { 2999 IS is_aux1,is_aux2; 3000 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3001 3002 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3003 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3004 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3005 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3006 for (i=0; i<n_D; i++) { 3007 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3008 } 3009 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3010 for (i=0, j=0; i<n_R; i++) { 3011 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3012 aux_array1[j++] = i; 3013 } 3014 } 3015 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3016 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3017 for (i=0, j=0; i<n_B; i++) { 3018 if (!PetscBTLookup(bitmask,is_indices[i])) { 3019 aux_array2[j++] = i; 3020 } 3021 } 3022 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3023 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3024 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3025 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3026 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3027 3028 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3029 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3030 for (i=0, j=0; i<n_R; i++) { 3031 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3032 aux_array1[j++] = i; 3033 } 3034 } 3035 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3036 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3037 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3038 } 3039 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3040 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3041 } else { 3042 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3043 IS tis; 3044 PetscInt schur_size; 3045 3046 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3047 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3048 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3049 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3050 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3051 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3052 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3053 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3054 } 3055 } 3056 PetscFunctionReturn(0); 3057 } 3058 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3062 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3063 { 3064 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3065 PC_IS *pcis = (PC_IS*)pc->data; 3066 PC pc_temp; 3067 Mat A_RR; 3068 MatReuse reuse; 3069 PetscScalar m_one = -1.0; 3070 PetscReal value; 3071 PetscInt n_D,n_R; 3072 PetscBool check_corr[2],issbaij; 3073 PetscErrorCode ierr; 3074 /* prefixes stuff */ 3075 char dir_prefix[256],neu_prefix[256],str_level[16]; 3076 size_t len; 3077 3078 PetscFunctionBegin; 3079 3080 /* compute prefixes */ 3081 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3082 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3083 if (!pcbddc->current_level) { 3084 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3085 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3086 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3087 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3088 } else { 3089 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3090 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3091 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3092 len -= 15; /* remove "pc_bddc_coarse_" */ 3093 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3094 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3095 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3096 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3097 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3098 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3099 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3100 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3101 } 3102 3103 /* DIRICHLET PROBLEM */ 3104 if (dirichlet) { 3105 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3106 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3107 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3108 if (pcbddc->dbg_flag) { 3109 Mat A_IIn; 3110 3111 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3112 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3113 pcis->A_II = A_IIn; 3114 } 3115 } 3116 if (pcbddc->local_mat->symmetric_set) { 3117 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3118 } 3119 /* Matrix for Dirichlet problem is pcis->A_II */ 3120 n_D = pcis->n - pcis->n_B; 3121 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3122 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3123 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3124 /* default */ 3125 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3126 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3127 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3128 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3129 if (issbaij) { 3130 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3131 } else { 3132 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3133 } 3134 /* Allow user's customization */ 3135 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3136 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3137 } 3138 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3139 if (sub_schurs && sub_schurs->reuse_solver) { 3140 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3141 3142 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3143 } 3144 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3145 if (!n_D) { 3146 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3147 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3148 } 3149 /* Set Up KSP for Dirichlet problem of BDDC */ 3150 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3151 /* set ksp_D into pcis data */ 3152 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3153 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3154 pcis->ksp_D = pcbddc->ksp_D; 3155 } 3156 3157 /* NEUMANN PROBLEM */ 3158 A_RR = 0; 3159 if (neumann) { 3160 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3161 PetscInt ibs,mbs; 3162 PetscBool issbaij; 3163 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3164 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3165 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3166 if (pcbddc->ksp_R) { /* already created ksp */ 3167 PetscInt nn_R; 3168 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3169 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3170 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3171 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3172 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3173 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3174 reuse = MAT_INITIAL_MATRIX; 3175 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3176 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3177 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3178 reuse = MAT_INITIAL_MATRIX; 3179 } else { /* safe to reuse the matrix */ 3180 reuse = MAT_REUSE_MATRIX; 3181 } 3182 } 3183 /* last check */ 3184 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3185 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3186 reuse = MAT_INITIAL_MATRIX; 3187 } 3188 } else { /* first time, so we need to create the matrix */ 3189 reuse = MAT_INITIAL_MATRIX; 3190 } 3191 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3192 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3193 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3194 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3195 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3196 if (matis->A == pcbddc->local_mat) { 3197 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3198 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3199 } else { 3200 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3201 } 3202 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3203 if (matis->A == pcbddc->local_mat) { 3204 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3205 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3206 } else { 3207 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3208 } 3209 } 3210 /* extract A_RR */ 3211 if (sub_schurs && sub_schurs->reuse_solver) { 3212 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3213 3214 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3215 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3216 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3217 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3218 } else { 3219 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3220 } 3221 } else { 3222 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3223 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3224 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3225 } 3226 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3227 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3228 } 3229 if (pcbddc->local_mat->symmetric_set) { 3230 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3231 } 3232 if (!pcbddc->ksp_R) { /* create object if not present */ 3233 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3234 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3235 /* default */ 3236 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3237 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3238 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3239 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3240 if (issbaij) { 3241 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3242 } else { 3243 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3244 } 3245 /* Allow user's customization */ 3246 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3247 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3248 } 3249 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3250 if (!n_R) { 3251 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3252 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3253 } 3254 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3255 /* Reuse solver if it is present */ 3256 if (sub_schurs && sub_schurs->reuse_solver) { 3257 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3258 3259 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3260 } 3261 /* Set Up KSP for Neumann problem of BDDC */ 3262 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3263 } 3264 3265 if (pcbddc->dbg_flag) { 3266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3267 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3268 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3269 } 3270 3271 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3272 check_corr[0] = check_corr[1] = PETSC_FALSE; 3273 if (pcbddc->NullSpace_corr[0]) { 3274 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3275 } 3276 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3277 check_corr[0] = PETSC_TRUE; 3278 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3279 } 3280 if (neumann && pcbddc->NullSpace_corr[2]) { 3281 check_corr[1] = PETSC_TRUE; 3282 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3283 } 3284 3285 /* check Dirichlet and Neumann solvers */ 3286 if (pcbddc->dbg_flag) { 3287 if (dirichlet) { /* Dirichlet */ 3288 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3289 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3290 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3291 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3292 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3294 if (check_corr[0]) { 3295 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3296 } 3297 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3298 } 3299 if (neumann) { /* Neumann */ 3300 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3301 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3302 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3303 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3304 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3305 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3306 if (check_corr[1]) { 3307 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3308 } 3309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3310 } 3311 } 3312 /* free Neumann problem's matrix */ 3313 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3319 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3320 { 3321 PetscErrorCode ierr; 3322 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3323 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3324 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3325 3326 PetscFunctionBegin; 3327 if (!reuse_solver) { 3328 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3329 } 3330 if (!pcbddc->switch_static) { 3331 if (applytranspose && pcbddc->local_auxmat1) { 3332 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3333 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3334 } 3335 if (!reuse_solver) { 3336 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3337 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3338 } else { 3339 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3340 3341 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3342 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3343 } 3344 } else { 3345 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3346 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3347 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3348 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3349 if (applytranspose && pcbddc->local_auxmat1) { 3350 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3351 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3352 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3353 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3354 } 3355 } 3356 if (!reuse_solver || pcbddc->switch_static) { 3357 if (applytranspose) { 3358 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3359 } else { 3360 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3361 } 3362 } else { 3363 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3364 3365 if (applytranspose) { 3366 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3367 } else { 3368 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3369 } 3370 } 3371 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3372 if (!pcbddc->switch_static) { 3373 if (!reuse_solver) { 3374 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3375 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3376 } else { 3377 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3378 3379 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3380 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3381 } 3382 if (!applytranspose && pcbddc->local_auxmat1) { 3383 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3384 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3385 } 3386 } else { 3387 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3388 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3389 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3390 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3391 if (!applytranspose && pcbddc->local_auxmat1) { 3392 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3393 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3394 } 3395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3397 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3398 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3399 } 3400 PetscFunctionReturn(0); 3401 } 3402 3403 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3406 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3407 { 3408 PetscErrorCode ierr; 3409 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3410 PC_IS* pcis = (PC_IS*) (pc->data); 3411 const PetscScalar zero = 0.0; 3412 3413 PetscFunctionBegin; 3414 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3415 if (!pcbddc->benign_apply_coarse_only) { 3416 if (applytranspose) { 3417 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3418 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3419 } else { 3420 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3421 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3422 } 3423 } else { 3424 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3425 } 3426 3427 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3428 if (pcbddc->benign_n) { 3429 PetscScalar *array; 3430 PetscInt j; 3431 3432 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3433 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3434 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3435 } 3436 3437 /* start communications from local primal nodes to rhs of coarse solver */ 3438 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3439 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3440 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3441 3442 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3443 if (pcbddc->coarse_ksp) { 3444 Mat coarse_mat; 3445 Vec rhs,sol; 3446 MatNullSpace nullsp; 3447 PetscBool isbddc = PETSC_FALSE; 3448 3449 if (pcbddc->benign_have_null) { 3450 PC coarse_pc; 3451 3452 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3453 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3454 /* we need to propagate to coarser levels the need for a possible benign correction */ 3455 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3456 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3457 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3458 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3459 } 3460 } 3461 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3462 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3463 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3464 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3465 if (nullsp) { 3466 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3467 } 3468 if (applytranspose) { 3469 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3470 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3471 } else { 3472 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3473 PC coarse_pc; 3474 3475 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3476 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3477 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3478 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3479 } else { 3480 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3481 } 3482 } 3483 /* we don't need the benign correction at coarser levels anymore */ 3484 if (pcbddc->benign_have_null && isbddc) { 3485 PC coarse_pc; 3486 PC_BDDC* coarsepcbddc; 3487 3488 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3489 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3490 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3491 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3492 } 3493 if (nullsp) { 3494 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3495 } 3496 } 3497 3498 /* Local solution on R nodes */ 3499 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3500 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3501 } 3502 /* communications from coarse sol to local primal nodes */ 3503 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3504 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3505 3506 /* Sum contributions from the two levels */ 3507 if (!pcbddc->benign_apply_coarse_only) { 3508 if (applytranspose) { 3509 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3510 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3511 } else { 3512 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3513 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3514 } 3515 /* store p0 */ 3516 if (pcbddc->benign_n) { 3517 PetscScalar *array; 3518 PetscInt j; 3519 3520 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3521 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3522 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3523 } 3524 } else { /* expand the coarse solution */ 3525 if (applytranspose) { 3526 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3527 } else { 3528 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3529 } 3530 } 3531 PetscFunctionReturn(0); 3532 } 3533 3534 #undef __FUNCT__ 3535 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3536 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3537 { 3538 PetscErrorCode ierr; 3539 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3540 PetscScalar *array; 3541 Vec from,to; 3542 3543 PetscFunctionBegin; 3544 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3545 from = pcbddc->coarse_vec; 3546 to = pcbddc->vec1_P; 3547 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3548 Vec tvec; 3549 3550 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3551 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3552 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3553 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3554 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3555 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3556 } 3557 } else { /* from local to global -> put data in coarse right hand side */ 3558 from = pcbddc->vec1_P; 3559 to = pcbddc->coarse_vec; 3560 } 3561 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3567 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3568 { 3569 PetscErrorCode ierr; 3570 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3571 PetscScalar *array; 3572 Vec from,to; 3573 3574 PetscFunctionBegin; 3575 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3576 from = pcbddc->coarse_vec; 3577 to = pcbddc->vec1_P; 3578 } else { /* from local to global -> put data in coarse right hand side */ 3579 from = pcbddc->vec1_P; 3580 to = pcbddc->coarse_vec; 3581 } 3582 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3583 if (smode == SCATTER_FORWARD) { 3584 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3585 Vec tvec; 3586 3587 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3588 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3589 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3590 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3591 } 3592 } else { 3593 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3594 ierr = VecResetArray(from);CHKERRQ(ierr); 3595 } 3596 } 3597 PetscFunctionReturn(0); 3598 } 3599 3600 /* uncomment for testing purposes */ 3601 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3602 #undef __FUNCT__ 3603 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3604 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3605 { 3606 PetscErrorCode ierr; 3607 PC_IS* pcis = (PC_IS*)(pc->data); 3608 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3609 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3610 /* one and zero */ 3611 PetscScalar one=1.0,zero=0.0; 3612 /* space to store constraints and their local indices */ 3613 PetscScalar *constraints_data; 3614 PetscInt *constraints_idxs,*constraints_idxs_B; 3615 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3616 PetscInt *constraints_n; 3617 /* iterators */ 3618 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3619 /* BLAS integers */ 3620 PetscBLASInt lwork,lierr; 3621 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3622 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3623 /* reuse */ 3624 PetscInt olocal_primal_size,olocal_primal_size_cc; 3625 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3626 /* change of basis */ 3627 PetscBool qr_needed; 3628 PetscBT change_basis,qr_needed_idx; 3629 /* auxiliary stuff */ 3630 PetscInt *nnz,*is_indices; 3631 PetscInt ncc; 3632 /* some quantities */ 3633 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3634 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3635 3636 PetscFunctionBegin; 3637 /* Destroy Mat objects computed previously */ 3638 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3639 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3640 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3641 /* save info on constraints from previous setup (if any) */ 3642 olocal_primal_size = pcbddc->local_primal_size; 3643 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3644 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3645 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3646 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3647 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3648 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3649 3650 if (!pcbddc->adaptive_selection) { 3651 IS ISForVertices,*ISForFaces,*ISForEdges; 3652 MatNullSpace nearnullsp; 3653 const Vec *nearnullvecs; 3654 Vec *localnearnullsp; 3655 PetscScalar *array; 3656 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3657 PetscBool nnsp_has_cnst; 3658 /* LAPACK working arrays for SVD or POD */ 3659 PetscBool skip_lapack,boolforchange; 3660 PetscScalar *work; 3661 PetscReal *singular_vals; 3662 #if defined(PETSC_USE_COMPLEX) 3663 PetscReal *rwork; 3664 #endif 3665 #if defined(PETSC_MISSING_LAPACK_GESVD) 3666 PetscScalar *temp_basis,*correlation_mat; 3667 #else 3668 PetscBLASInt dummy_int=1; 3669 PetscScalar dummy_scalar=1.; 3670 #endif 3671 3672 /* Get index sets for faces, edges and vertices from graph */ 3673 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3674 /* print some info */ 3675 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3676 PetscInt nv; 3677 3678 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3679 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3680 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3685 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3686 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3687 } 3688 3689 /* free unneeded index sets */ 3690 if (!pcbddc->use_vertices) { 3691 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3692 } 3693 if (!pcbddc->use_edges) { 3694 for (i=0;i<n_ISForEdges;i++) { 3695 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3696 } 3697 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3698 n_ISForEdges = 0; 3699 } 3700 if (!pcbddc->use_faces) { 3701 for (i=0;i<n_ISForFaces;i++) { 3702 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3703 } 3704 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3705 n_ISForFaces = 0; 3706 } 3707 3708 /* check if near null space is attached to global mat */ 3709 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3710 if (nearnullsp) { 3711 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3712 /* remove any stored info */ 3713 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3714 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3715 /* store information for BDDC solver reuse */ 3716 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3717 pcbddc->onearnullspace = nearnullsp; 3718 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3719 for (i=0;i<nnsp_size;i++) { 3720 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3721 } 3722 } else { /* if near null space is not provided BDDC uses constants by default */ 3723 nnsp_size = 0; 3724 nnsp_has_cnst = PETSC_TRUE; 3725 } 3726 /* get max number of constraints on a single cc */ 3727 max_constraints = nnsp_size; 3728 if (nnsp_has_cnst) max_constraints++; 3729 3730 /* 3731 Evaluate maximum storage size needed by the procedure 3732 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3733 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3734 There can be multiple constraints per connected component 3735 */ 3736 n_vertices = 0; 3737 if (ISForVertices) { 3738 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3739 } 3740 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3741 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3742 3743 total_counts = n_ISForFaces+n_ISForEdges; 3744 total_counts *= max_constraints; 3745 total_counts += n_vertices; 3746 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3747 3748 total_counts = 0; 3749 max_size_of_constraint = 0; 3750 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3751 IS used_is; 3752 if (i<n_ISForEdges) { 3753 used_is = ISForEdges[i]; 3754 } else { 3755 used_is = ISForFaces[i-n_ISForEdges]; 3756 } 3757 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3758 total_counts += j; 3759 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3760 } 3761 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3762 3763 /* get local part of global near null space vectors */ 3764 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3765 for (k=0;k<nnsp_size;k++) { 3766 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3767 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3768 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3769 } 3770 3771 /* whether or not to skip lapack calls */ 3772 skip_lapack = PETSC_TRUE; 3773 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3774 3775 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3776 if (!skip_lapack) { 3777 PetscScalar temp_work; 3778 3779 #if defined(PETSC_MISSING_LAPACK_GESVD) 3780 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3781 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3782 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3783 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3784 #if defined(PETSC_USE_COMPLEX) 3785 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3786 #endif 3787 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3788 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3789 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3790 lwork = -1; 3791 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3792 #if !defined(PETSC_USE_COMPLEX) 3793 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3794 #else 3795 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3796 #endif 3797 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3798 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3799 #else /* on missing GESVD */ 3800 /* SVD */ 3801 PetscInt max_n,min_n; 3802 max_n = max_size_of_constraint; 3803 min_n = max_constraints; 3804 if (max_size_of_constraint < max_constraints) { 3805 min_n = max_size_of_constraint; 3806 max_n = max_constraints; 3807 } 3808 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3809 #if defined(PETSC_USE_COMPLEX) 3810 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3811 #endif 3812 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3813 lwork = -1; 3814 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3815 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3816 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3817 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3818 #if !defined(PETSC_USE_COMPLEX) 3819 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3820 #else 3821 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3822 #endif 3823 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3824 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3825 #endif /* on missing GESVD */ 3826 /* Allocate optimal workspace */ 3827 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3828 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3829 } 3830 /* Now we can loop on constraining sets */ 3831 total_counts = 0; 3832 constraints_idxs_ptr[0] = 0; 3833 constraints_data_ptr[0] = 0; 3834 /* vertices */ 3835 if (n_vertices) { 3836 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3837 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3838 for (i=0;i<n_vertices;i++) { 3839 constraints_n[total_counts] = 1; 3840 constraints_data[total_counts] = 1.0; 3841 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3842 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3843 total_counts++; 3844 } 3845 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3846 n_vertices = total_counts; 3847 } 3848 3849 /* edges and faces */ 3850 total_counts_cc = total_counts; 3851 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3852 IS used_is; 3853 PetscBool idxs_copied = PETSC_FALSE; 3854 3855 if (ncc<n_ISForEdges) { 3856 used_is = ISForEdges[ncc]; 3857 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3858 } else { 3859 used_is = ISForFaces[ncc-n_ISForEdges]; 3860 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3861 } 3862 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3863 3864 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3865 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3866 /* change of basis should not be performed on local periodic nodes */ 3867 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3868 if (nnsp_has_cnst) { 3869 PetscScalar quad_value; 3870 3871 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3872 idxs_copied = PETSC_TRUE; 3873 3874 if (!pcbddc->use_nnsp_true) { 3875 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3876 } else { 3877 quad_value = 1.0; 3878 } 3879 for (j=0;j<size_of_constraint;j++) { 3880 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3881 } 3882 temp_constraints++; 3883 total_counts++; 3884 } 3885 for (k=0;k<nnsp_size;k++) { 3886 PetscReal real_value; 3887 PetscScalar *ptr_to_data; 3888 3889 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3890 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3891 for (j=0;j<size_of_constraint;j++) { 3892 ptr_to_data[j] = array[is_indices[j]]; 3893 } 3894 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3895 /* check if array is null on the connected component */ 3896 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3897 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3898 if (real_value > 0.0) { /* keep indices and values */ 3899 temp_constraints++; 3900 total_counts++; 3901 if (!idxs_copied) { 3902 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3903 idxs_copied = PETSC_TRUE; 3904 } 3905 } 3906 } 3907 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3908 valid_constraints = temp_constraints; 3909 if (!pcbddc->use_nnsp_true && temp_constraints) { 3910 if (temp_constraints == 1) { /* just normalize the constraint */ 3911 PetscScalar norm,*ptr_to_data; 3912 3913 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3914 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3915 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3916 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3917 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3918 } else { /* perform SVD */ 3919 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3920 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3921 3922 #if defined(PETSC_MISSING_LAPACK_GESVD) 3923 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3924 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3925 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3926 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3927 from that computed using LAPACKgesvd 3928 -> This is due to a different computation of eigenvectors in LAPACKheev 3929 -> The quality of the POD-computed basis will be the same */ 3930 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3931 /* Store upper triangular part of correlation matrix */ 3932 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3933 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3934 for (j=0;j<temp_constraints;j++) { 3935 for (k=0;k<j+1;k++) { 3936 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3937 } 3938 } 3939 /* compute eigenvalues and eigenvectors of correlation matrix */ 3940 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3941 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3942 #if !defined(PETSC_USE_COMPLEX) 3943 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3944 #else 3945 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3946 #endif 3947 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3948 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3949 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3950 j = 0; 3951 while (j < temp_constraints && singular_vals[j] < tol) j++; 3952 total_counts = total_counts-j; 3953 valid_constraints = temp_constraints-j; 3954 /* scale and copy POD basis into used quadrature memory */ 3955 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3956 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3957 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3959 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3961 if (j<temp_constraints) { 3962 PetscInt ii; 3963 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3964 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3965 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3966 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3967 for (k=0;k<temp_constraints-j;k++) { 3968 for (ii=0;ii<size_of_constraint;ii++) { 3969 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3970 } 3971 } 3972 } 3973 #else /* on missing GESVD */ 3974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3975 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3977 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3978 #if !defined(PETSC_USE_COMPLEX) 3979 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3980 #else 3981 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3982 #endif 3983 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3984 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3985 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3986 k = temp_constraints; 3987 if (k > size_of_constraint) k = size_of_constraint; 3988 j = 0; 3989 while (j < k && singular_vals[k-j-1] < tol) j++; 3990 valid_constraints = k-j; 3991 total_counts = total_counts-temp_constraints+valid_constraints; 3992 #endif /* on missing GESVD */ 3993 } 3994 } 3995 /* update pointers information */ 3996 if (valid_constraints) { 3997 constraints_n[total_counts_cc] = valid_constraints; 3998 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3999 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4000 /* set change_of_basis flag */ 4001 if (boolforchange) { 4002 PetscBTSet(change_basis,total_counts_cc); 4003 } 4004 total_counts_cc++; 4005 } 4006 } 4007 /* free workspace */ 4008 if (!skip_lapack) { 4009 ierr = PetscFree(work);CHKERRQ(ierr); 4010 #if defined(PETSC_USE_COMPLEX) 4011 ierr = PetscFree(rwork);CHKERRQ(ierr); 4012 #endif 4013 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4014 #if defined(PETSC_MISSING_LAPACK_GESVD) 4015 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4016 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4017 #endif 4018 } 4019 for (k=0;k<nnsp_size;k++) { 4020 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4021 } 4022 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4023 /* free index sets of faces, edges and vertices */ 4024 for (i=0;i<n_ISForFaces;i++) { 4025 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4026 } 4027 if (n_ISForFaces) { 4028 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4029 } 4030 for (i=0;i<n_ISForEdges;i++) { 4031 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4032 } 4033 if (n_ISForEdges) { 4034 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4035 } 4036 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4037 } else { 4038 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4039 4040 total_counts = 0; 4041 n_vertices = 0; 4042 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4043 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4044 } 4045 max_constraints = 0; 4046 total_counts_cc = 0; 4047 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4048 total_counts += pcbddc->adaptive_constraints_n[i]; 4049 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4050 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4051 } 4052 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4053 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4054 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4055 constraints_data = pcbddc->adaptive_constraints_data; 4056 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4057 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4058 total_counts_cc = 0; 4059 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4060 if (pcbddc->adaptive_constraints_n[i]) { 4061 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4062 } 4063 } 4064 #if 0 4065 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4066 for (i=0;i<total_counts_cc;i++) { 4067 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4068 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4069 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4070 printf(" %d",constraints_idxs[j]); 4071 } 4072 printf("\n"); 4073 printf("number of cc: %d\n",constraints_n[i]); 4074 } 4075 for (i=0;i<n_vertices;i++) { 4076 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4077 } 4078 for (i=0;i<sub_schurs->n_subs;i++) { 4079 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4080 } 4081 #endif 4082 4083 max_size_of_constraint = 0; 4084 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4085 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4086 /* Change of basis */ 4087 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4088 if (pcbddc->use_change_of_basis) { 4089 for (i=0;i<sub_schurs->n_subs;i++) { 4090 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4091 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4092 } 4093 } 4094 } 4095 } 4096 pcbddc->local_primal_size = total_counts; 4097 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4098 4099 /* map constraints_idxs in boundary numbering */ 4100 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4101 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4102 4103 /* Create constraint matrix */ 4104 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4105 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4106 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4107 4108 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4109 /* determine if a QR strategy is needed for change of basis */ 4110 qr_needed = PETSC_FALSE; 4111 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4112 total_primal_vertices=0; 4113 pcbddc->local_primal_size_cc = 0; 4114 for (i=0;i<total_counts_cc;i++) { 4115 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4116 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4117 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4118 pcbddc->local_primal_size_cc += 1; 4119 } else if (PetscBTLookup(change_basis,i)) { 4120 for (k=0;k<constraints_n[i];k++) { 4121 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4122 } 4123 pcbddc->local_primal_size_cc += constraints_n[i]; 4124 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4125 PetscBTSet(qr_needed_idx,i); 4126 qr_needed = PETSC_TRUE; 4127 } 4128 } else { 4129 pcbddc->local_primal_size_cc += 1; 4130 } 4131 } 4132 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4133 pcbddc->n_vertices = total_primal_vertices; 4134 /* permute indices in order to have a sorted set of vertices */ 4135 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4136 4137 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4138 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4139 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4140 4141 /* nonzero structure of constraint matrix */ 4142 /* and get reference dof for local constraints */ 4143 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4144 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4145 4146 j = total_primal_vertices; 4147 total_counts = total_primal_vertices; 4148 cum = total_primal_vertices; 4149 for (i=n_vertices;i<total_counts_cc;i++) { 4150 if (!PetscBTLookup(change_basis,i)) { 4151 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4152 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4153 cum++; 4154 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4155 for (k=0;k<constraints_n[i];k++) { 4156 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4157 nnz[j+k] = size_of_constraint; 4158 } 4159 j += constraints_n[i]; 4160 } 4161 } 4162 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4163 ierr = PetscFree(nnz);CHKERRQ(ierr); 4164 4165 /* set values in constraint matrix */ 4166 for (i=0;i<total_primal_vertices;i++) { 4167 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4168 } 4169 total_counts = total_primal_vertices; 4170 for (i=n_vertices;i<total_counts_cc;i++) { 4171 if (!PetscBTLookup(change_basis,i)) { 4172 PetscInt *cols; 4173 4174 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4175 cols = constraints_idxs+constraints_idxs_ptr[i]; 4176 for (k=0;k<constraints_n[i];k++) { 4177 PetscInt row = total_counts+k; 4178 PetscScalar *vals; 4179 4180 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4181 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4182 } 4183 total_counts += constraints_n[i]; 4184 } 4185 } 4186 /* assembling */ 4187 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4188 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4189 4190 /* 4191 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4192 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4193 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4194 */ 4195 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4196 if (pcbddc->use_change_of_basis) { 4197 /* dual and primal dofs on a single cc */ 4198 PetscInt dual_dofs,primal_dofs; 4199 /* working stuff for GEQRF */ 4200 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4201 PetscBLASInt lqr_work; 4202 /* working stuff for UNGQR */ 4203 PetscScalar *gqr_work,lgqr_work_t; 4204 PetscBLASInt lgqr_work; 4205 /* working stuff for TRTRS */ 4206 PetscScalar *trs_rhs; 4207 PetscBLASInt Blas_NRHS; 4208 /* pointers for values insertion into change of basis matrix */ 4209 PetscInt *start_rows,*start_cols; 4210 PetscScalar *start_vals; 4211 /* working stuff for values insertion */ 4212 PetscBT is_primal; 4213 PetscInt *aux_primal_numbering_B; 4214 /* matrix sizes */ 4215 PetscInt global_size,local_size; 4216 /* temporary change of basis */ 4217 Mat localChangeOfBasisMatrix; 4218 /* extra space for debugging */ 4219 PetscScalar *dbg_work; 4220 4221 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4222 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4223 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4224 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4225 /* nonzeros for local mat */ 4226 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4227 if (!pcbddc->benign_change || pcbddc->fake_change) { 4228 for (i=0;i<pcis->n;i++) nnz[i]=1; 4229 } else { 4230 const PetscInt *ii; 4231 PetscInt n; 4232 PetscBool flg_row; 4233 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4234 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4235 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4236 } 4237 for (i=n_vertices;i<total_counts_cc;i++) { 4238 if (PetscBTLookup(change_basis,i)) { 4239 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4240 if (PetscBTLookup(qr_needed_idx,i)) { 4241 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4242 } else { 4243 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4244 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4245 } 4246 } 4247 } 4248 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4249 ierr = PetscFree(nnz);CHKERRQ(ierr); 4250 /* Set interior change in the matrix */ 4251 if (!pcbddc->benign_change || pcbddc->fake_change) { 4252 for (i=0;i<pcis->n;i++) { 4253 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4254 } 4255 } else { 4256 const PetscInt *ii,*jj; 4257 PetscScalar *aa; 4258 PetscInt n; 4259 PetscBool flg_row; 4260 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4261 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4262 for (i=0;i<n;i++) { 4263 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4264 } 4265 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4266 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4267 } 4268 4269 if (pcbddc->dbg_flag) { 4270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4272 } 4273 4274 4275 /* Now we loop on the constraints which need a change of basis */ 4276 /* 4277 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4278 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4279 4280 Basic blocks of change of basis matrix T computed by 4281 4282 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4283 4284 | 1 0 ... 0 s_1/S | 4285 | 0 1 ... 0 s_2/S | 4286 | ... | 4287 | 0 ... 1 s_{n-1}/S | 4288 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4289 4290 with S = \sum_{i=1}^n s_i^2 4291 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4292 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4293 4294 - QR decomposition of constraints otherwise 4295 */ 4296 if (qr_needed) { 4297 /* space to store Q */ 4298 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4299 /* first we issue queries for optimal work */ 4300 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4301 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4302 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4303 lqr_work = -1; 4304 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4305 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4306 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4307 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4308 lgqr_work = -1; 4309 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4310 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4311 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4312 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4313 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4314 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4315 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4316 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4317 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4318 /* array to store scaling factors for reflectors */ 4319 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4320 /* array to store rhs and solution of triangular solver */ 4321 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4322 /* allocating workspace for check */ 4323 if (pcbddc->dbg_flag) { 4324 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4325 } 4326 } 4327 /* array to store whether a node is primal or not */ 4328 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4329 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4330 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4331 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4332 for (i=0;i<total_primal_vertices;i++) { 4333 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4334 } 4335 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4336 4337 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4338 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4339 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4340 if (PetscBTLookup(change_basis,total_counts)) { 4341 /* get constraint info */ 4342 primal_dofs = constraints_n[total_counts]; 4343 dual_dofs = size_of_constraint-primal_dofs; 4344 4345 if (pcbddc->dbg_flag) { 4346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4347 } 4348 4349 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4350 4351 /* copy quadrature constraints for change of basis check */ 4352 if (pcbddc->dbg_flag) { 4353 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4354 } 4355 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4356 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4357 4358 /* compute QR decomposition of constraints */ 4359 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4360 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4361 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4362 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4363 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4364 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4365 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4366 4367 /* explictly compute R^-T */ 4368 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4369 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4370 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4371 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4372 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4373 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4374 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4375 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4376 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4377 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4378 4379 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4380 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4381 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4382 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4383 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4384 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4385 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4386 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4387 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4388 4389 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4390 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4391 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4392 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4393 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4394 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4395 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4396 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4397 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4398 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4399 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4400 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4401 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4402 4403 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4404 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4405 /* insert cols for primal dofs */ 4406 for (j=0;j<primal_dofs;j++) { 4407 start_vals = &qr_basis[j*size_of_constraint]; 4408 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4409 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4410 } 4411 /* insert cols for dual dofs */ 4412 for (j=0,k=0;j<dual_dofs;k++) { 4413 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4414 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4415 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4416 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4417 j++; 4418 } 4419 } 4420 4421 /* check change of basis */ 4422 if (pcbddc->dbg_flag) { 4423 PetscInt ii,jj; 4424 PetscBool valid_qr=PETSC_TRUE; 4425 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4427 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4428 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4429 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4430 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4431 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4432 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4433 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4434 for (jj=0;jj<size_of_constraint;jj++) { 4435 for (ii=0;ii<primal_dofs;ii++) { 4436 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4437 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4438 } 4439 } 4440 if (!valid_qr) { 4441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4442 for (jj=0;jj<size_of_constraint;jj++) { 4443 for (ii=0;ii<primal_dofs;ii++) { 4444 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4445 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4446 } 4447 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4448 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4449 } 4450 } 4451 } 4452 } else { 4453 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4454 } 4455 } 4456 } else { /* simple transformation block */ 4457 PetscInt row,col; 4458 PetscScalar val,norm; 4459 4460 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4461 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4462 for (j=0;j<size_of_constraint;j++) { 4463 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4464 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4465 if (!PetscBTLookup(is_primal,row_B)) { 4466 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4467 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4468 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4469 } else { 4470 for (k=0;k<size_of_constraint;k++) { 4471 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4472 if (row != col) { 4473 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4474 } else { 4475 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4476 } 4477 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4478 } 4479 } 4480 } 4481 if (pcbddc->dbg_flag) { 4482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4483 } 4484 } 4485 } else { 4486 if (pcbddc->dbg_flag) { 4487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4488 } 4489 } 4490 } 4491 4492 /* free workspace */ 4493 if (qr_needed) { 4494 if (pcbddc->dbg_flag) { 4495 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4496 } 4497 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4498 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4499 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4500 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4501 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4502 } 4503 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4504 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4505 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4506 4507 /* assembling of global change of variable */ 4508 if (!pcbddc->fake_change) { 4509 Mat tmat; 4510 PetscInt bs; 4511 4512 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4513 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4514 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4515 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4516 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4517 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4518 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4519 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4520 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4521 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4522 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4523 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4524 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4525 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4526 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4527 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4528 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4529 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4530 4531 /* check */ 4532 if (pcbddc->dbg_flag) { 4533 PetscReal error; 4534 Vec x,x_change; 4535 4536 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4537 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4538 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4539 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4540 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4541 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4542 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4543 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4544 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4545 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4546 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4547 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4550 ierr = VecDestroy(&x);CHKERRQ(ierr); 4551 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4552 } 4553 /* adapt sub_schurs computed (if any) */ 4554 if (pcbddc->use_deluxe_scaling) { 4555 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4556 4557 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4558 if (sub_schurs && sub_schurs->S_Ej_all) { 4559 Mat S_new,tmat; 4560 IS is_all_N,is_V_Sall = NULL; 4561 4562 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4563 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4564 if (pcbddc->deluxe_zerorows) { 4565 ISLocalToGlobalMapping NtoSall; 4566 IS is_V; 4567 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4568 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4569 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4570 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4571 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4572 } 4573 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4574 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4575 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4576 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4577 if (pcbddc->deluxe_zerorows) { 4578 const PetscScalar *array; 4579 const PetscInt *idxs_V,*idxs_all; 4580 PetscInt i,n_V; 4581 4582 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4583 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4584 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4585 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4586 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4587 for (i=0;i<n_V;i++) { 4588 PetscScalar val; 4589 PetscInt idx; 4590 4591 idx = idxs_V[i]; 4592 val = array[idxs_all[idxs_V[i]]]; 4593 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4594 } 4595 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4596 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4597 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4598 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4599 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4600 } 4601 sub_schurs->S_Ej_all = S_new; 4602 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4603 if (sub_schurs->sum_S_Ej_all) { 4604 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4605 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4606 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4607 if (pcbddc->deluxe_zerorows) { 4608 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4609 } 4610 sub_schurs->sum_S_Ej_all = S_new; 4611 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4612 } 4613 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4614 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4615 } 4616 /* destroy any change of basis context in sub_schurs */ 4617 if (sub_schurs && sub_schurs->change) { 4618 PetscInt i; 4619 4620 for (i=0;i<sub_schurs->n_subs;i++) { 4621 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4622 } 4623 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4624 } 4625 } 4626 if (pcbddc->switch_static) { /* need to save the local change */ 4627 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4628 } else { 4629 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4630 } 4631 /* determine if any process has changed the pressures locally */ 4632 pcbddc->change_interior = pcbddc->benign_have_null; 4633 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4634 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4635 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4636 pcbddc->use_qr_single = qr_needed; 4637 } 4638 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4639 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4640 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4641 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4642 } else { 4643 Mat benign_global = NULL; 4644 if (pcbddc->benign_have_null) { 4645 Mat tmat; 4646 4647 pcbddc->change_interior = PETSC_TRUE; 4648 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4649 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4650 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4651 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4652 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4653 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4654 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4655 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4656 if (pcbddc->benign_change) { 4657 Mat M; 4658 4659 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4660 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4661 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4662 ierr = MatDestroy(&M);CHKERRQ(ierr); 4663 } else { 4664 Mat eye; 4665 PetscScalar *array; 4666 4667 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4668 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4669 for (i=0;i<pcis->n;i++) { 4670 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4671 } 4672 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4673 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4674 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4675 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4676 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4677 } 4678 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4679 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4680 } 4681 if (pcbddc->user_ChangeOfBasisMatrix) { 4682 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4683 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4684 } else if (pcbddc->benign_have_null) { 4685 pcbddc->ChangeOfBasisMatrix = benign_global; 4686 } 4687 } 4688 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4689 IS is_global; 4690 const PetscInt *gidxs; 4691 4692 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4693 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4694 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4695 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4696 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4697 } 4698 } 4699 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4700 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4701 } 4702 4703 if (!pcbddc->fake_change) { 4704 /* add pressure dofs to set of primal nodes for numbering purposes */ 4705 for (i=0;i<pcbddc->benign_n;i++) { 4706 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4707 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4708 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4709 pcbddc->local_primal_size_cc++; 4710 pcbddc->local_primal_size++; 4711 } 4712 4713 /* check if a new primal space has been introduced (also take into account benign trick) */ 4714 pcbddc->new_primal_space_local = PETSC_TRUE; 4715 if (olocal_primal_size == pcbddc->local_primal_size) { 4716 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4717 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4718 if (!pcbddc->new_primal_space_local) { 4719 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4720 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4721 } 4722 } 4723 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4724 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4725 } 4726 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4727 4728 /* flush dbg viewer */ 4729 if (pcbddc->dbg_flag) { 4730 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4731 } 4732 4733 /* free workspace */ 4734 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4735 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4736 if (!pcbddc->adaptive_selection) { 4737 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4738 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4739 } else { 4740 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4741 pcbddc->adaptive_constraints_idxs_ptr, 4742 pcbddc->adaptive_constraints_data_ptr, 4743 pcbddc->adaptive_constraints_idxs, 4744 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4745 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4746 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4747 } 4748 PetscFunctionReturn(0); 4749 } 4750 4751 #undef __FUNCT__ 4752 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4753 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4754 { 4755 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4756 PC_IS *pcis = (PC_IS*)pc->data; 4757 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4758 PetscInt ierr,i,N; 4759 4760 PetscFunctionBegin; 4761 /* Reset previously computed graph */ 4762 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4763 /* Init local Graph struct */ 4764 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4765 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4766 4767 /* Check validity of the csr graph passed in by the user */ 4768 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4769 4770 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4771 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4772 PetscInt *xadj,*adjncy; 4773 PetscInt nvtxs; 4774 PetscBool flg_row=PETSC_FALSE; 4775 4776 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4777 if (flg_row) { 4778 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4779 pcbddc->computed_rowadj = PETSC_TRUE; 4780 } 4781 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4782 } 4783 if (pcbddc->dbg_flag) { 4784 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4785 } 4786 4787 /* Setup of Graph */ 4788 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4789 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4790 4791 /* attach info on disconnected subdomains if present */ 4792 if (pcbddc->n_local_subs) { 4793 PetscInt *local_subs; 4794 4795 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4796 for (i=0;i<pcbddc->n_local_subs;i++) { 4797 const PetscInt *idxs; 4798 PetscInt nl,j; 4799 4800 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4801 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4802 for (j=0;j<nl;j++) { 4803 local_subs[idxs[j]] = i; 4804 } 4805 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4806 } 4807 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4808 pcbddc->mat_graph->local_subs = local_subs; 4809 } 4810 4811 /* Graph's connected components analysis */ 4812 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4813 PetscFunctionReturn(0); 4814 } 4815 4816 #undef __FUNCT__ 4817 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4818 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4819 { 4820 PetscInt i,j; 4821 PetscScalar *alphas; 4822 PetscErrorCode ierr; 4823 4824 PetscFunctionBegin; 4825 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4826 for (i=0;i<n;i++) { 4827 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4828 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4829 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4830 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4831 } 4832 ierr = PetscFree(alphas);CHKERRQ(ierr); 4833 PetscFunctionReturn(0); 4834 } 4835 4836 #undef __FUNCT__ 4837 #define __FUNCT__ "MatISGetSubassemblingPattern" 4838 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4839 { 4840 Mat A; 4841 PetscInt n_neighs,*neighs,*n_shared,**shared; 4842 PetscMPIInt size,rank,color; 4843 PetscInt *xadj,*adjncy; 4844 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4845 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4846 PetscInt void_procs,*procs_candidates = NULL; 4847 PetscInt xadj_count, *count; 4848 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4849 PetscSubcomm psubcomm; 4850 MPI_Comm subcomm; 4851 PetscErrorCode ierr; 4852 4853 PetscFunctionBegin; 4854 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4855 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4856 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4857 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4858 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4859 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4860 4861 if (have_void) *have_void = PETSC_FALSE; 4862 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4863 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4864 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4865 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4866 im_active = !!(n); 4867 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4868 void_procs = size - active_procs; 4869 /* get ranks of of non-active processes in mat communicator */ 4870 if (void_procs) { 4871 PetscInt ncand; 4872 4873 if (have_void) *have_void = PETSC_TRUE; 4874 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4875 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4876 for (i=0,ncand=0;i<size;i++) { 4877 if (!procs_candidates[i]) { 4878 procs_candidates[ncand++] = i; 4879 } 4880 } 4881 /* force n_subdomains to be not greater that the number of non-active processes */ 4882 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4883 } 4884 4885 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4886 if (active_procs < *n_subdomains) { 4887 PetscInt issize,isidx; 4888 if (im_active) { 4889 issize = 1; 4890 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4891 isidx = procs_candidates[rank]; 4892 } else { 4893 isidx = rank; 4894 } 4895 } else { 4896 issize = 0; 4897 isidx = -1; 4898 } 4899 *n_subdomains = active_procs; 4900 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4901 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4902 PetscFunctionReturn(0); 4903 } 4904 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4905 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4906 threshold = PetscMax(threshold,2); 4907 4908 /* Get info on mapping */ 4909 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4910 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4911 4912 /* build local CSR graph of subdomains' connectivity */ 4913 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4914 xadj[0] = 0; 4915 xadj[1] = PetscMax(n_neighs-1,0); 4916 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4917 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4918 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4919 for (i=1;i<n_neighs;i++) 4920 for (j=0;j<n_shared[i];j++) 4921 count[shared[i][j]] += 1; 4922 4923 xadj_count = 0; 4924 for (i=1;i<n_neighs;i++) { 4925 for (j=0;j<n_shared[i];j++) { 4926 if (count[shared[i][j]] < threshold) { 4927 adjncy[xadj_count] = neighs[i]; 4928 adjncy_wgt[xadj_count] = n_shared[i]; 4929 xadj_count++; 4930 break; 4931 } 4932 } 4933 } 4934 xadj[1] = xadj_count; 4935 ierr = PetscFree(count);CHKERRQ(ierr); 4936 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4937 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4938 4939 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4940 4941 /* Restrict work on active processes only */ 4942 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4943 if (void_procs) { 4944 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4945 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4946 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4947 subcomm = PetscSubcommChild(psubcomm); 4948 } else { 4949 psubcomm = NULL; 4950 subcomm = PetscObjectComm((PetscObject)mat); 4951 } 4952 4953 v_wgt = NULL; 4954 if (!color) { 4955 ierr = PetscFree(xadj);CHKERRQ(ierr); 4956 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4957 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4958 } else { 4959 Mat subdomain_adj; 4960 IS new_ranks,new_ranks_contig; 4961 MatPartitioning partitioner; 4962 PetscInt rstart=0,rend=0; 4963 PetscInt *is_indices,*oldranks; 4964 PetscMPIInt size; 4965 PetscBool aggregate; 4966 4967 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4968 if (void_procs) { 4969 PetscInt prank = rank; 4970 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4971 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4972 for (i=0;i<xadj[1];i++) { 4973 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4974 } 4975 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4976 } else { 4977 oldranks = NULL; 4978 } 4979 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4980 if (aggregate) { /* TODO: all this part could be made more efficient */ 4981 PetscInt lrows,row,ncols,*cols; 4982 PetscMPIInt nrank; 4983 PetscScalar *vals; 4984 4985 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 4986 lrows = 0; 4987 if (nrank<redprocs) { 4988 lrows = size/redprocs; 4989 if (nrank<size%redprocs) lrows++; 4990 } 4991 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4992 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4993 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4994 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4995 row = nrank; 4996 ncols = xadj[1]-xadj[0]; 4997 cols = adjncy; 4998 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4999 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5000 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5001 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5002 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5003 ierr = PetscFree(xadj);CHKERRQ(ierr); 5004 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5005 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5006 ierr = PetscFree(vals);CHKERRQ(ierr); 5007 if (use_vwgt) { 5008 Vec v; 5009 const PetscScalar *array; 5010 PetscInt nl; 5011 5012 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5013 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5014 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5015 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5016 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5017 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5018 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5019 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5020 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5021 ierr = VecDestroy(&v);CHKERRQ(ierr); 5022 } 5023 } else { 5024 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5025 if (use_vwgt) { 5026 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5027 v_wgt[0] = local_size; 5028 } 5029 } 5030 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5031 5032 /* Partition */ 5033 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5034 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5035 if (v_wgt) { 5036 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5037 } 5038 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5039 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5040 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5041 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5042 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5043 5044 /* renumber new_ranks to avoid "holes" in new set of processors */ 5045 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5046 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5047 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5048 if (!aggregate) { 5049 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5050 #if defined(PETSC_USE_DEBUG) 5051 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5052 #endif 5053 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5054 } else if (oldranks) { 5055 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5056 } else { 5057 ranks_send_to_idx[0] = is_indices[0]; 5058 } 5059 } else { 5060 PetscInt idxs[1]; 5061 PetscMPIInt tag; 5062 MPI_Request *reqs; 5063 5064 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5065 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5066 for (i=rstart;i<rend;i++) { 5067 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5068 } 5069 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5070 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5071 ierr = PetscFree(reqs);CHKERRQ(ierr); 5072 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5073 #if defined(PETSC_USE_DEBUG) 5074 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5075 #endif 5076 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5077 } else if (oldranks) { 5078 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5079 } else { 5080 ranks_send_to_idx[0] = idxs[0]; 5081 } 5082 } 5083 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5084 /* clean up */ 5085 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5086 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5087 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5088 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5089 } 5090 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5091 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5092 5093 /* assemble parallel IS for sends */ 5094 i = 1; 5095 if (!color) i=0; 5096 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5101 5102 #undef __FUNCT__ 5103 #define __FUNCT__ "MatISSubassemble" 5104 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5105 { 5106 Mat local_mat; 5107 IS is_sends_internal; 5108 PetscInt rows,cols,new_local_rows; 5109 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5110 PetscBool ismatis,isdense,newisdense,destroy_mat; 5111 ISLocalToGlobalMapping l2gmap; 5112 PetscInt* l2gmap_indices; 5113 const PetscInt* is_indices; 5114 MatType new_local_type; 5115 /* buffers */ 5116 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5117 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5118 PetscInt *recv_buffer_idxs_local; 5119 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5120 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5121 /* MPI */ 5122 MPI_Comm comm,comm_n; 5123 PetscSubcomm subcomm; 5124 PetscMPIInt n_sends,n_recvs,commsize; 5125 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5126 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5127 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5128 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5129 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5130 PetscErrorCode ierr; 5131 5132 PetscFunctionBegin; 5133 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5134 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5135 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5136 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5137 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5138 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5139 PetscValidLogicalCollectiveBool(mat,reuse,6); 5140 PetscValidLogicalCollectiveInt(mat,nis,8); 5141 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5142 if (nvecs) { 5143 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5144 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5145 } 5146 /* further checks */ 5147 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5148 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5149 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5150 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5151 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5152 if (reuse && *mat_n) { 5153 PetscInt mrows,mcols,mnrows,mncols; 5154 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5155 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5156 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5157 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5158 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5159 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5160 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5161 } 5162 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5163 PetscValidLogicalCollectiveInt(mat,bs,0); 5164 5165 /* prepare IS for sending if not provided */ 5166 if (!is_sends) { 5167 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5168 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5169 } else { 5170 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5171 is_sends_internal = is_sends; 5172 } 5173 5174 /* get comm */ 5175 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5176 5177 /* compute number of sends */ 5178 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5179 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5180 5181 /* compute number of receives */ 5182 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5183 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5184 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5185 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5186 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5187 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5188 ierr = PetscFree(iflags);CHKERRQ(ierr); 5189 5190 /* restrict comm if requested */ 5191 subcomm = 0; 5192 destroy_mat = PETSC_FALSE; 5193 if (restrict_comm) { 5194 PetscMPIInt color,subcommsize; 5195 5196 color = 0; 5197 if (restrict_full) { 5198 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5199 } else { 5200 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5201 } 5202 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5203 subcommsize = commsize - subcommsize; 5204 /* check if reuse has been requested */ 5205 if (reuse) { 5206 if (*mat_n) { 5207 PetscMPIInt subcommsize2; 5208 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5209 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5210 comm_n = PetscObjectComm((PetscObject)*mat_n); 5211 } else { 5212 comm_n = PETSC_COMM_SELF; 5213 } 5214 } else { /* MAT_INITIAL_MATRIX */ 5215 PetscMPIInt rank; 5216 5217 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5218 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5219 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5220 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5221 comm_n = PetscSubcommChild(subcomm); 5222 } 5223 /* flag to destroy *mat_n if not significative */ 5224 if (color) destroy_mat = PETSC_TRUE; 5225 } else { 5226 comm_n = comm; 5227 } 5228 5229 /* prepare send/receive buffers */ 5230 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5231 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5232 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5233 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5234 if (nis) { 5235 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5236 } 5237 5238 /* Get data from local matrices */ 5239 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5240 /* TODO: See below some guidelines on how to prepare the local buffers */ 5241 /* 5242 send_buffer_vals should contain the raw values of the local matrix 5243 send_buffer_idxs should contain: 5244 - MatType_PRIVATE type 5245 - PetscInt size_of_l2gmap 5246 - PetscInt global_row_indices[size_of_l2gmap] 5247 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5248 */ 5249 else { 5250 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5251 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5252 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5253 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5254 send_buffer_idxs[1] = i; 5255 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5256 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5257 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5258 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5259 for (i=0;i<n_sends;i++) { 5260 ilengths_vals[is_indices[i]] = len*len; 5261 ilengths_idxs[is_indices[i]] = len+2; 5262 } 5263 } 5264 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5265 /* additional is (if any) */ 5266 if (nis) { 5267 PetscMPIInt psum; 5268 PetscInt j; 5269 for (j=0,psum=0;j<nis;j++) { 5270 PetscInt plen; 5271 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5272 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5273 psum += len+1; /* indices + lenght */ 5274 } 5275 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5276 for (j=0,psum=0;j<nis;j++) { 5277 PetscInt plen; 5278 const PetscInt *is_array_idxs; 5279 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5280 send_buffer_idxs_is[psum] = plen; 5281 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5282 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5283 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5284 psum += plen+1; /* indices + lenght */ 5285 } 5286 for (i=0;i<n_sends;i++) { 5287 ilengths_idxs_is[is_indices[i]] = psum; 5288 } 5289 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5290 } 5291 5292 buf_size_idxs = 0; 5293 buf_size_vals = 0; 5294 buf_size_idxs_is = 0; 5295 buf_size_vecs = 0; 5296 for (i=0;i<n_recvs;i++) { 5297 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5298 buf_size_vals += (PetscInt)olengths_vals[i]; 5299 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5300 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5301 } 5302 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5303 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5304 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5305 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5306 5307 /* get new tags for clean communications */ 5308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5309 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5310 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5311 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5312 5313 /* allocate for requests */ 5314 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5315 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5316 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5317 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5318 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5319 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5320 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5321 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5322 5323 /* communications */ 5324 ptr_idxs = recv_buffer_idxs; 5325 ptr_vals = recv_buffer_vals; 5326 ptr_idxs_is = recv_buffer_idxs_is; 5327 ptr_vecs = recv_buffer_vecs; 5328 for (i=0;i<n_recvs;i++) { 5329 source_dest = onodes[i]; 5330 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5331 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5332 ptr_idxs += olengths_idxs[i]; 5333 ptr_vals += olengths_vals[i]; 5334 if (nis) { 5335 source_dest = onodes_is[i]; 5336 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5337 ptr_idxs_is += olengths_idxs_is[i]; 5338 } 5339 if (nvecs) { 5340 source_dest = onodes[i]; 5341 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5342 ptr_vecs += olengths_idxs[i]-2; 5343 } 5344 } 5345 for (i=0;i<n_sends;i++) { 5346 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5347 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5348 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5349 if (nis) { 5350 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5351 } 5352 if (nvecs) { 5353 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5354 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5355 } 5356 } 5357 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5358 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5359 5360 /* assemble new l2g map */ 5361 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5362 ptr_idxs = recv_buffer_idxs; 5363 new_local_rows = 0; 5364 for (i=0;i<n_recvs;i++) { 5365 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5366 ptr_idxs += olengths_idxs[i]; 5367 } 5368 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5369 ptr_idxs = recv_buffer_idxs; 5370 new_local_rows = 0; 5371 for (i=0;i<n_recvs;i++) { 5372 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5373 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5374 ptr_idxs += olengths_idxs[i]; 5375 } 5376 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5377 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5378 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5379 5380 /* infer new local matrix type from received local matrices type */ 5381 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5382 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5383 if (n_recvs) { 5384 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5385 ptr_idxs = recv_buffer_idxs; 5386 for (i=0;i<n_recvs;i++) { 5387 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5388 new_local_type_private = MATAIJ_PRIVATE; 5389 break; 5390 } 5391 ptr_idxs += olengths_idxs[i]; 5392 } 5393 switch (new_local_type_private) { 5394 case MATDENSE_PRIVATE: 5395 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5396 new_local_type = MATSEQAIJ; 5397 bs = 1; 5398 } else { /* if I receive only 1 dense matrix */ 5399 new_local_type = MATSEQDENSE; 5400 bs = 1; 5401 } 5402 break; 5403 case MATAIJ_PRIVATE: 5404 new_local_type = MATSEQAIJ; 5405 bs = 1; 5406 break; 5407 case MATBAIJ_PRIVATE: 5408 new_local_type = MATSEQBAIJ; 5409 break; 5410 case MATSBAIJ_PRIVATE: 5411 new_local_type = MATSEQSBAIJ; 5412 break; 5413 default: 5414 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5415 break; 5416 } 5417 } else { /* by default, new_local_type is seqdense */ 5418 new_local_type = MATSEQDENSE; 5419 bs = 1; 5420 } 5421 5422 /* create MATIS object if needed */ 5423 if (!reuse) { 5424 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5425 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5426 } else { 5427 /* it also destroys the local matrices */ 5428 if (*mat_n) { 5429 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5430 } else { /* this is a fake object */ 5431 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5432 } 5433 } 5434 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5435 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5436 5437 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5438 5439 /* Global to local map of received indices */ 5440 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5441 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5442 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5443 5444 /* restore attributes -> type of incoming data and its size */ 5445 buf_size_idxs = 0; 5446 for (i=0;i<n_recvs;i++) { 5447 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5448 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5449 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5450 } 5451 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5452 5453 /* set preallocation */ 5454 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5455 if (!newisdense) { 5456 PetscInt *new_local_nnz=0; 5457 5458 ptr_idxs = recv_buffer_idxs_local; 5459 if (n_recvs) { 5460 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5461 } 5462 for (i=0;i<n_recvs;i++) { 5463 PetscInt j; 5464 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5465 for (j=0;j<*(ptr_idxs+1);j++) { 5466 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5467 } 5468 } else { 5469 /* TODO */ 5470 } 5471 ptr_idxs += olengths_idxs[i]; 5472 } 5473 if (new_local_nnz) { 5474 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5475 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5476 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5477 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5478 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5479 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5480 } else { 5481 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5482 } 5483 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5484 } else { 5485 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5486 } 5487 5488 /* set values */ 5489 ptr_vals = recv_buffer_vals; 5490 ptr_idxs = recv_buffer_idxs_local; 5491 for (i=0;i<n_recvs;i++) { 5492 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5493 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5494 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5495 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5496 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5497 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5498 } else { 5499 /* TODO */ 5500 } 5501 ptr_idxs += olengths_idxs[i]; 5502 ptr_vals += olengths_vals[i]; 5503 } 5504 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5505 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5506 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5507 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5508 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5509 5510 #if 0 5511 if (!restrict_comm) { /* check */ 5512 Vec lvec,rvec; 5513 PetscReal infty_error; 5514 5515 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5516 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5517 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5518 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5519 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5520 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5521 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5522 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5523 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5524 } 5525 #endif 5526 5527 /* assemble new additional is (if any) */ 5528 if (nis) { 5529 PetscInt **temp_idxs,*count_is,j,psum; 5530 5531 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5532 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5533 ptr_idxs = recv_buffer_idxs_is; 5534 psum = 0; 5535 for (i=0;i<n_recvs;i++) { 5536 for (j=0;j<nis;j++) { 5537 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5538 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5539 psum += plen; 5540 ptr_idxs += plen+1; /* shift pointer to received data */ 5541 } 5542 } 5543 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5544 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5545 for (i=1;i<nis;i++) { 5546 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5547 } 5548 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5549 ptr_idxs = recv_buffer_idxs_is; 5550 for (i=0;i<n_recvs;i++) { 5551 for (j=0;j<nis;j++) { 5552 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5553 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5554 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5555 ptr_idxs += plen+1; /* shift pointer to received data */ 5556 } 5557 } 5558 for (i=0;i<nis;i++) { 5559 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5560 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5561 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5562 } 5563 ierr = PetscFree(count_is);CHKERRQ(ierr); 5564 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5565 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5566 } 5567 /* free workspace */ 5568 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5569 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5570 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5571 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5572 if (isdense) { 5573 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5574 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5575 } else { 5576 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5577 } 5578 if (nis) { 5579 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5580 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5581 } 5582 5583 if (nvecs) { 5584 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5585 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5586 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5587 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5588 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5589 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5590 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5591 /* set values */ 5592 ptr_vals = recv_buffer_vecs; 5593 ptr_idxs = recv_buffer_idxs_local; 5594 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5595 for (i=0;i<n_recvs;i++) { 5596 PetscInt j; 5597 for (j=0;j<*(ptr_idxs+1);j++) { 5598 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5599 } 5600 ptr_idxs += olengths_idxs[i]; 5601 ptr_vals += olengths_idxs[i]-2; 5602 } 5603 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5604 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5605 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5606 } 5607 5608 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5609 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5610 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5611 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5612 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5613 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5614 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5615 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5616 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5617 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5618 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5619 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5620 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5621 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5622 ierr = PetscFree(onodes);CHKERRQ(ierr); 5623 if (nis) { 5624 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5625 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5626 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5627 } 5628 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5629 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5630 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5631 for (i=0;i<nis;i++) { 5632 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5633 } 5634 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5635 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5636 } 5637 *mat_n = NULL; 5638 } 5639 PetscFunctionReturn(0); 5640 } 5641 5642 /* temporary hack into ksp private data structure */ 5643 #include <petsc/private/kspimpl.h> 5644 5645 #undef __FUNCT__ 5646 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5647 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5648 { 5649 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5650 PC_IS *pcis = (PC_IS*)pc->data; 5651 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5652 Mat coarsedivudotp = NULL; 5653 MatNullSpace CoarseNullSpace = NULL; 5654 ISLocalToGlobalMapping coarse_islg; 5655 IS coarse_is,*isarray; 5656 PetscInt i,im_active=-1,active_procs=-1; 5657 PetscInt nis,nisdofs,nisneu,nisvert; 5658 PC pc_temp; 5659 PCType coarse_pc_type; 5660 KSPType coarse_ksp_type; 5661 PetscBool multilevel_requested,multilevel_allowed; 5662 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5663 Mat t_coarse_mat_is; 5664 PetscInt ncoarse; 5665 PetscBool compute_vecs = PETSC_FALSE; 5666 PetscScalar *array; 5667 MatReuse coarse_mat_reuse; 5668 PetscBool restr, full_restr, have_void; 5669 PetscErrorCode ierr; 5670 5671 PetscFunctionBegin; 5672 /* Assign global numbering to coarse dofs */ 5673 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5674 PetscInt ocoarse_size; 5675 compute_vecs = PETSC_TRUE; 5676 ocoarse_size = pcbddc->coarse_size; 5677 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5678 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5679 /* see if we can avoid some work */ 5680 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5681 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5682 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5683 PC pc; 5684 PetscBool isbddc; 5685 5686 /* temporary workaround since PCBDDC does not have a reset method so far */ 5687 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5688 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5689 if (isbddc) { 5690 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5691 } else { 5692 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5693 } 5694 coarse_reuse = PETSC_FALSE; 5695 } else { /* we can safely reuse already computed coarse matrix */ 5696 coarse_reuse = PETSC_TRUE; 5697 } 5698 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5699 coarse_reuse = PETSC_FALSE; 5700 } 5701 /* reset any subassembling information */ 5702 if (!coarse_reuse || pcbddc->recompute_topography) { 5703 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5704 } 5705 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5706 coarse_reuse = PETSC_TRUE; 5707 } 5708 /* assemble coarse matrix */ 5709 if (coarse_reuse && pcbddc->coarse_ksp) { 5710 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5711 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5712 coarse_mat_reuse = MAT_REUSE_MATRIX; 5713 } else { 5714 coarse_mat = NULL; 5715 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5716 } 5717 5718 /* creates temporary l2gmap and IS for coarse indexes */ 5719 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5720 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5721 5722 /* creates temporary MATIS object for coarse matrix */ 5723 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5724 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5725 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5726 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5727 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5728 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5729 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5730 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5731 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5732 5733 /* count "active" (i.e. with positive local size) and "void" processes */ 5734 im_active = !!(pcis->n); 5735 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5736 5737 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5738 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5739 /* full_restr : just use the receivers from the subassembling pattern */ 5740 coarse_mat_is = NULL; 5741 multilevel_allowed = PETSC_FALSE; 5742 multilevel_requested = PETSC_FALSE; 5743 full_restr = PETSC_TRUE; 5744 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5745 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5746 if (multilevel_requested) { 5747 ncoarse = active_procs/pcbddc->coarsening_ratio; 5748 restr = PETSC_FALSE; 5749 full_restr = PETSC_FALSE; 5750 } else { 5751 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5752 restr = PETSC_TRUE; 5753 full_restr = PETSC_TRUE; 5754 } 5755 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5756 ncoarse = PetscMax(1,ncoarse); 5757 if (!pcbddc->coarse_subassembling) { 5758 if (pcbddc->coarsening_ratio > 1) { 5759 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5760 } else { 5761 PetscMPIInt size,rank; 5762 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5763 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5764 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5765 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5766 } 5767 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5768 PetscInt psum; 5769 PetscMPIInt size; 5770 if (pcbddc->coarse_ksp) psum = 1; 5771 else psum = 0; 5772 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5773 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5774 if (ncoarse < size) have_void = PETSC_TRUE; 5775 } 5776 /* determine if we can go multilevel */ 5777 if (multilevel_requested) { 5778 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5779 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5780 } 5781 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5782 5783 /* dump subassembling pattern */ 5784 if (pcbddc->dbg_flag && multilevel_allowed) { 5785 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5786 } 5787 5788 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5789 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5790 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5791 const PetscInt *idxs; 5792 ISLocalToGlobalMapping tmap; 5793 5794 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5795 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5796 /* allocate space for temporary storage */ 5797 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5798 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5799 /* allocate for IS array */ 5800 nisdofs = pcbddc->n_ISForDofsLocal; 5801 nisneu = !!pcbddc->NeumannBoundariesLocal; 5802 nisvert = 0; /* nisvert is not used */ 5803 nis = nisdofs + nisneu + nisvert; 5804 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5805 /* dofs splitting */ 5806 for (i=0;i<nisdofs;i++) { 5807 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5808 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5809 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5810 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5811 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5812 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5813 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5814 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5815 } 5816 /* neumann boundaries */ 5817 if (pcbddc->NeumannBoundariesLocal) { 5818 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5819 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5820 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5821 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5822 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5823 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5824 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5825 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5826 } 5827 /* free memory */ 5828 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5829 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5830 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5831 } else { 5832 nis = 0; 5833 nisdofs = 0; 5834 nisneu = 0; 5835 nisvert = 0; 5836 isarray = NULL; 5837 } 5838 /* destroy no longer needed map */ 5839 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5840 5841 /* subassemble */ 5842 if (multilevel_allowed) { 5843 Vec vp[1]; 5844 PetscInt nvecs = 0; 5845 PetscBool reuse,reuser; 5846 5847 if (coarse_mat) reuse = PETSC_TRUE; 5848 else reuse = PETSC_FALSE; 5849 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5850 vp[0] = NULL; 5851 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5852 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5853 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5854 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5855 nvecs = 1; 5856 5857 if (pcbddc->divudotp) { 5858 Mat B,loc_divudotp; 5859 Vec v,p; 5860 IS dummy; 5861 PetscInt np; 5862 5863 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5864 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5865 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5866 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5867 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5868 ierr = VecSet(p,1.);CHKERRQ(ierr); 5869 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5870 ierr = VecDestroy(&p);CHKERRQ(ierr); 5871 ierr = MatDestroy(&B);CHKERRQ(ierr); 5872 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5873 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5874 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5875 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5876 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5877 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5878 ierr = VecDestroy(&v);CHKERRQ(ierr); 5879 } 5880 } 5881 if (reuser) { 5882 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5883 } else { 5884 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5885 } 5886 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5887 PetscScalar *arraym,*arrayv; 5888 PetscInt nl; 5889 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5890 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5891 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5892 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5893 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5894 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5895 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5896 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5897 } else { 5898 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5899 } 5900 } else { 5901 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5902 } 5903 if (coarse_mat_is || coarse_mat) { 5904 PetscMPIInt size; 5905 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5906 if (!multilevel_allowed) { 5907 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5908 } else { 5909 Mat A; 5910 5911 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5912 if (coarse_mat_is) { 5913 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5914 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5915 coarse_mat = coarse_mat_is; 5916 } 5917 /* be sure we don't have MatSeqDENSE as local mat */ 5918 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5919 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5920 } 5921 } 5922 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5923 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5924 5925 /* create local to global scatters for coarse problem */ 5926 if (compute_vecs) { 5927 PetscInt lrows; 5928 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5929 if (coarse_mat) { 5930 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5931 } else { 5932 lrows = 0; 5933 } 5934 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5935 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5936 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5937 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5938 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5939 } 5940 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5941 5942 /* set defaults for coarse KSP and PC */ 5943 if (multilevel_allowed) { 5944 coarse_ksp_type = KSPRICHARDSON; 5945 coarse_pc_type = PCBDDC; 5946 } else { 5947 coarse_ksp_type = KSPPREONLY; 5948 coarse_pc_type = PCREDUNDANT; 5949 } 5950 5951 /* print some info if requested */ 5952 if (pcbddc->dbg_flag) { 5953 if (!multilevel_allowed) { 5954 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5955 if (multilevel_requested) { 5956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5957 } else if (pcbddc->max_levels) { 5958 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5959 } 5960 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5961 } 5962 } 5963 5964 /* create the coarse KSP object only once with defaults */ 5965 if (coarse_mat) { 5966 PetscViewer dbg_viewer = NULL; 5967 if (pcbddc->dbg_flag) { 5968 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5969 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5970 } 5971 if (!pcbddc->coarse_ksp) { 5972 char prefix[256],str_level[16]; 5973 size_t len; 5974 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5975 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5976 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5977 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5978 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5979 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5980 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5981 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5982 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5983 /* prefix */ 5984 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5985 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5986 if (!pcbddc->current_level) { 5987 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5988 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5989 } else { 5990 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5991 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5992 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5993 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5994 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5995 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5996 } 5997 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5998 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5999 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6000 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6001 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6002 /* allow user customization */ 6003 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6004 } 6005 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6006 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6007 if (nisdofs) { 6008 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6009 for (i=0;i<nisdofs;i++) { 6010 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6011 } 6012 } 6013 if (nisneu) { 6014 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6015 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6016 } 6017 if (nisvert) { 6018 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6019 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6020 } 6021 6022 /* get some info after set from options */ 6023 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6024 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6025 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6026 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6027 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6028 isbddc = PETSC_FALSE; 6029 } 6030 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6031 if (isredundant) { 6032 KSP inner_ksp; 6033 PC inner_pc; 6034 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6035 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6036 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6037 } 6038 6039 /* parameters which miss an API */ 6040 if (isbddc) { 6041 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6042 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6043 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6044 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6045 if (pcbddc_coarse->benign_saddle_point) { 6046 Mat coarsedivudotp_is; 6047 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6048 IS row,col; 6049 const PetscInt *gidxs; 6050 PetscInt n,st,M,N; 6051 6052 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6053 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6054 st = st-n; 6055 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6056 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6057 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6058 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6059 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6060 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6061 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6062 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6063 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6064 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6065 ierr = ISDestroy(&row);CHKERRQ(ierr); 6066 ierr = ISDestroy(&col);CHKERRQ(ierr); 6067 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6068 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6069 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6070 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6071 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6072 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6073 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6074 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6075 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,NULL);CHKERRQ(ierr); 6076 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6077 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6078 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6079 } 6080 } 6081 6082 /* propagate symmetry info of coarse matrix */ 6083 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6084 if (pc->pmat->symmetric_set) { 6085 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6086 } 6087 if (pc->pmat->hermitian_set) { 6088 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6089 } 6090 if (pc->pmat->spd_set) { 6091 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6092 } 6093 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6094 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6095 } 6096 /* set operators */ 6097 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6098 if (pcbddc->dbg_flag) { 6099 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6100 } 6101 } 6102 ierr = PetscFree(isarray);CHKERRQ(ierr); 6103 #if 0 6104 { 6105 PetscViewer viewer; 6106 char filename[256]; 6107 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6108 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6109 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6110 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6111 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6112 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6113 } 6114 #endif 6115 6116 if (pcbddc->coarse_ksp) { 6117 Vec crhs,csol; 6118 6119 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6120 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6121 if (!csol) { 6122 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6123 } 6124 if (!crhs) { 6125 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6126 } 6127 } 6128 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6129 6130 /* compute null space for coarse solver if the benign trick has been requested */ 6131 if (pcbddc->benign_null) { 6132 6133 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6134 for (i=0;i<pcbddc->benign_n;i++) { 6135 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6136 } 6137 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6138 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6139 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6140 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6141 if (coarse_mat) { 6142 Vec nullv; 6143 PetscScalar *array,*array2; 6144 PetscInt nl; 6145 6146 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6147 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6148 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6149 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6150 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6151 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6152 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6153 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6154 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6155 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6156 } 6157 } 6158 6159 if (pcbddc->coarse_ksp) { 6160 PetscBool ispreonly; 6161 6162 if (CoarseNullSpace) { 6163 PetscBool isnull; 6164 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6165 if (isnull) { 6166 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6167 } 6168 /* TODO: add local nullspaces (if any) */ 6169 } 6170 /* setup coarse ksp */ 6171 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6172 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6173 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6174 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6175 KSP check_ksp; 6176 KSPType check_ksp_type; 6177 PC check_pc; 6178 Vec check_vec,coarse_vec; 6179 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6180 PetscInt its; 6181 PetscBool compute_eigs; 6182 PetscReal *eigs_r,*eigs_c; 6183 PetscInt neigs; 6184 const char *prefix; 6185 6186 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6187 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6188 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6189 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6190 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6191 /* prevent from setup unneeded object */ 6192 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6193 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6194 if (ispreonly) { 6195 check_ksp_type = KSPPREONLY; 6196 compute_eigs = PETSC_FALSE; 6197 } else { 6198 check_ksp_type = KSPGMRES; 6199 compute_eigs = PETSC_TRUE; 6200 } 6201 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6202 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6203 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6204 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6205 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6206 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6207 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6208 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6209 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6210 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6211 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6212 /* create random vec */ 6213 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6214 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6215 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6216 /* solve coarse problem */ 6217 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6218 /* set eigenvalue estimation if preonly has not been requested */ 6219 if (compute_eigs) { 6220 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6221 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6222 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6223 if (neigs) { 6224 lambda_max = eigs_r[neigs-1]; 6225 lambda_min = eigs_r[0]; 6226 if (pcbddc->use_coarse_estimates) { 6227 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6228 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6229 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6230 } 6231 } 6232 } 6233 } 6234 6235 /* check coarse problem residual error */ 6236 if (pcbddc->dbg_flag) { 6237 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6238 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6239 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6240 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6241 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6242 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6243 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6244 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6245 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6246 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6247 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6248 if (CoarseNullSpace) { 6249 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6250 } 6251 if (compute_eigs) { 6252 PetscReal lambda_max_s,lambda_min_s; 6253 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6254 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6255 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6256 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6257 for (i=0;i<neigs;i++) { 6258 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6259 } 6260 } 6261 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6262 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6263 } 6264 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6265 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6266 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6267 if (compute_eigs) { 6268 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6269 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6270 } 6271 } 6272 } 6273 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6274 /* print additional info */ 6275 if (pcbddc->dbg_flag) { 6276 /* waits until all processes reaches this point */ 6277 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6278 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6280 } 6281 6282 /* free memory */ 6283 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6284 PetscFunctionReturn(0); 6285 } 6286 6287 #undef __FUNCT__ 6288 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6289 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6290 { 6291 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6292 PC_IS* pcis = (PC_IS*)pc->data; 6293 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6294 IS subset,subset_mult,subset_n; 6295 PetscInt local_size,coarse_size=0; 6296 PetscInt *local_primal_indices=NULL; 6297 const PetscInt *t_local_primal_indices; 6298 PetscErrorCode ierr; 6299 6300 PetscFunctionBegin; 6301 /* Compute global number of coarse dofs */ 6302 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6303 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6304 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6305 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6306 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6307 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6308 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6309 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6310 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6311 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6312 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6313 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6314 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6315 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6316 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6317 6318 /* check numbering */ 6319 if (pcbddc->dbg_flag) { 6320 PetscScalar coarsesum,*array,*array2; 6321 PetscInt i; 6322 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6323 6324 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6325 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6326 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6327 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6328 /* counter */ 6329 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6330 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6331 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6332 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6333 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6334 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6335 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6336 for (i=0;i<pcbddc->local_primal_size;i++) { 6337 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6338 } 6339 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6340 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6341 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6342 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6343 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6344 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6345 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6346 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6347 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6348 for (i=0;i<pcis->n;i++) { 6349 if (array[i] != 0.0 && array[i] != array2[i]) { 6350 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6351 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6352 set_error = PETSC_TRUE; 6353 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6355 } 6356 } 6357 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6358 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6359 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6360 for (i=0;i<pcis->n;i++) { 6361 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6362 } 6363 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6364 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6365 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6366 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6367 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6368 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6369 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6370 PetscInt *gidxs; 6371 6372 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6373 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6374 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6375 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6376 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6377 for (i=0;i<pcbddc->local_primal_size;i++) { 6378 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6379 } 6380 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6381 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6382 } 6383 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6384 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6385 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6386 } 6387 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6388 /* get back data */ 6389 *coarse_size_n = coarse_size; 6390 *local_primal_indices_n = local_primal_indices; 6391 PetscFunctionReturn(0); 6392 } 6393 6394 #undef __FUNCT__ 6395 #define __FUNCT__ "PCBDDCGlobalToLocal" 6396 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6397 { 6398 IS localis_t; 6399 PetscInt i,lsize,*idxs,n; 6400 PetscScalar *vals; 6401 PetscErrorCode ierr; 6402 6403 PetscFunctionBegin; 6404 /* get indices in local ordering exploiting local to global map */ 6405 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6406 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6407 for (i=0;i<lsize;i++) vals[i] = 1.0; 6408 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6409 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6410 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6411 if (idxs) { /* multilevel guard */ 6412 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6413 } 6414 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6415 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6416 ierr = PetscFree(vals);CHKERRQ(ierr); 6417 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6418 /* now compute set in local ordering */ 6419 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6420 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6421 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6422 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6423 for (i=0,lsize=0;i<n;i++) { 6424 if (PetscRealPart(vals[i]) > 0.5) { 6425 lsize++; 6426 } 6427 } 6428 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6429 for (i=0,lsize=0;i<n;i++) { 6430 if (PetscRealPart(vals[i]) > 0.5) { 6431 idxs[lsize++] = i; 6432 } 6433 } 6434 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6435 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6436 *localis = localis_t; 6437 PetscFunctionReturn(0); 6438 } 6439 6440 #undef __FUNCT__ 6441 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6442 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6443 { 6444 PC_IS *pcis=(PC_IS*)pc->data; 6445 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6446 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6447 Mat S_j; 6448 PetscInt *used_xadj,*used_adjncy; 6449 PetscBool free_used_adj; 6450 PetscErrorCode ierr; 6451 6452 PetscFunctionBegin; 6453 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6454 free_used_adj = PETSC_FALSE; 6455 if (pcbddc->sub_schurs_layers == -1) { 6456 used_xadj = NULL; 6457 used_adjncy = NULL; 6458 } else { 6459 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6460 used_xadj = pcbddc->mat_graph->xadj; 6461 used_adjncy = pcbddc->mat_graph->adjncy; 6462 } else if (pcbddc->computed_rowadj) { 6463 used_xadj = pcbddc->mat_graph->xadj; 6464 used_adjncy = pcbddc->mat_graph->adjncy; 6465 } else { 6466 PetscBool flg_row=PETSC_FALSE; 6467 const PetscInt *xadj,*adjncy; 6468 PetscInt nvtxs; 6469 6470 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6471 if (flg_row) { 6472 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6473 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6474 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6475 free_used_adj = PETSC_TRUE; 6476 } else { 6477 pcbddc->sub_schurs_layers = -1; 6478 used_xadj = NULL; 6479 used_adjncy = NULL; 6480 } 6481 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6482 } 6483 } 6484 6485 /* setup sub_schurs data */ 6486 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6487 if (!sub_schurs->schur_explicit) { 6488 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6489 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6490 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6491 } else { 6492 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6493 PetscBool isseqaij,need_change = PETSC_FALSE;; 6494 PetscInt benign_n; 6495 Mat change = NULL; 6496 Vec scaling = NULL; 6497 IS change_primal = NULL; 6498 6499 if (!pcbddc->use_vertices && reuse_solvers) { 6500 PetscInt n_vertices; 6501 6502 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6503 reuse_solvers = (PetscBool)!n_vertices; 6504 } 6505 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6506 if (!isseqaij) { 6507 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6508 if (matis->A == pcbddc->local_mat) { 6509 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6510 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6511 } else { 6512 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6513 } 6514 } 6515 if (!pcbddc->benign_change_explicit) { 6516 benign_n = pcbddc->benign_n; 6517 } else { 6518 benign_n = 0; 6519 } 6520 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6521 We need a global reduction to avoid possible deadlocks. 6522 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6523 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6524 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6525 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6526 need_change = (PetscBool)(!need_change); 6527 } 6528 /* If the user defines additional constraints, we import them here. 6529 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6530 if (need_change) { 6531 PC_IS *pcisf; 6532 PC_BDDC *pcbddcf; 6533 PC pcf; 6534 6535 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6536 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6537 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6538 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6539 /* hacks */ 6540 pcisf = (PC_IS*)pcf->data; 6541 pcisf->is_B_local = pcis->is_B_local; 6542 pcisf->vec1_N = pcis->vec1_N; 6543 pcisf->BtoNmap = pcis->BtoNmap; 6544 pcisf->n = pcis->n; 6545 pcisf->n_B = pcis->n_B; 6546 pcbddcf = (PC_BDDC*)pcf->data; 6547 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6548 pcbddcf->mat_graph = pcbddc->mat_graph; 6549 pcbddcf->use_faces = PETSC_TRUE; 6550 pcbddcf->use_change_of_basis = PETSC_TRUE; 6551 pcbddcf->use_change_on_faces = PETSC_TRUE; 6552 pcbddcf->use_qr_single = PETSC_TRUE; 6553 pcbddcf->fake_change = PETSC_TRUE; 6554 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6555 /* store information on primal vertices and change of basis (in local numbering) */ 6556 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6557 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6558 change = pcbddcf->ConstraintMatrix; 6559 pcbddcf->ConstraintMatrix = NULL; 6560 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6561 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6562 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6563 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6564 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6565 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6566 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6567 pcf->ops->destroy = NULL; 6568 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6569 } 6570 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6571 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6572 ierr = MatDestroy(&change);CHKERRQ(ierr); 6573 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6574 } 6575 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6576 6577 /* free adjacency */ 6578 if (free_used_adj) { 6579 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6580 } 6581 PetscFunctionReturn(0); 6582 } 6583 6584 #undef __FUNCT__ 6585 #define __FUNCT__ "PCBDDCInitSubSchurs" 6586 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6587 { 6588 PC_IS *pcis=(PC_IS*)pc->data; 6589 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6590 PCBDDCGraph graph; 6591 PetscErrorCode ierr; 6592 6593 PetscFunctionBegin; 6594 /* attach interface graph for determining subsets */ 6595 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6596 IS verticesIS,verticescomm; 6597 PetscInt vsize,*idxs; 6598 6599 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6600 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6601 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6602 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6603 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6604 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6605 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6606 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6607 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6608 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6609 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6610 } else { 6611 graph = pcbddc->mat_graph; 6612 } 6613 /* print some info */ 6614 if (pcbddc->dbg_flag) { 6615 IS vertices; 6616 PetscInt nv,nedges,nfaces; 6617 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6618 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6619 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6620 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6621 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6626 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6627 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6628 } 6629 6630 /* sub_schurs init */ 6631 if (!pcbddc->sub_schurs) { 6632 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6633 } 6634 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6635 6636 /* free graph struct */ 6637 if (pcbddc->sub_schurs_rebuild) { 6638 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6639 } 6640 PetscFunctionReturn(0); 6641 } 6642 6643 #undef __FUNCT__ 6644 #define __FUNCT__ "PCBDDCCheckOperator" 6645 PetscErrorCode PCBDDCCheckOperator(PC pc) 6646 { 6647 PC_IS *pcis=(PC_IS*)pc->data; 6648 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6649 PetscErrorCode ierr; 6650 6651 PetscFunctionBegin; 6652 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6653 IS zerodiag = NULL; 6654 Mat S_j,B0_B=NULL; 6655 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6656 PetscScalar *p0_check,*array,*array2; 6657 PetscReal norm; 6658 PetscInt i; 6659 6660 /* B0 and B0_B */ 6661 if (zerodiag) { 6662 IS dummy; 6663 6664 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6665 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6666 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6667 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6668 } 6669 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6670 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6671 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6672 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6673 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6674 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6675 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6676 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6677 /* S_j */ 6678 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6679 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6680 6681 /* mimic vector in \widetilde{W}_\Gamma */ 6682 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6683 /* continuous in primal space */ 6684 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6685 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6686 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6687 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6688 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6689 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6690 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6691 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6692 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6693 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6694 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6695 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6696 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6697 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6698 6699 /* assemble rhs for coarse problem */ 6700 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6701 /* local with Schur */ 6702 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6703 if (zerodiag) { 6704 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6705 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6706 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6707 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6708 } 6709 /* sum on primal nodes the local contributions */ 6710 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6711 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6712 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6713 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6714 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6715 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6716 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6717 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6718 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6719 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6720 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6721 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6722 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6723 /* scale primal nodes (BDDC sums contibutions) */ 6724 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6725 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6726 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6727 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6728 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6729 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6730 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6731 /* global: \widetilde{B0}_B w_\Gamma */ 6732 if (zerodiag) { 6733 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6734 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6735 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6736 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6737 } 6738 /* BDDC */ 6739 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6740 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6741 6742 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6743 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6744 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6745 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6746 for (i=0;i<pcbddc->benign_n;i++) { 6747 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6748 } 6749 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6750 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6751 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6752 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6753 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6754 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6755 } 6756 PetscFunctionReturn(0); 6757 } 6758