1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 10 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PCBDDCGraph graph, MatNullSpace *nnsp) 11 { 12 Mat loc_divudotp; 13 Vec p,v,quad_vec,*quad_vecs; 14 ISLocalToGlobalMapping rmap; 15 IS *faces,*edges; 16 PetscScalar *vals; 17 const PetscScalar *array; 18 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 19 PetscMPIInt rank; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 24 if (graph->twodim) { 25 lmaxneighs = 2; 26 } else { 27 lmaxneighs = 1; 28 for (i=0;i<ne;i++) { 29 const PetscInt *idxs; 30 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 31 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 32 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 33 } 34 lmaxneighs++; /* graph count does not include self */ 35 } 36 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 37 maxsize = 0; 38 for (i=0;i<ne;i++) { 39 PetscInt nn; 40 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 41 maxsize = PetscMax(maxsize,nn); 42 } 43 for (i=0;i<nf;i++) { 44 PetscInt nn; 45 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 46 maxsize = PetscMax(maxsize,nn); 47 } 48 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 49 /* create vectors to hold quadrature weights */ 50 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 51 ierr = MatGetLocalToGlobalMapping(A,&rmap,NULL);CHKERRQ(ierr); 52 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 53 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 54 for (i=0;i<maxneighs;i++) { 55 PetscInt first,last; 56 57 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],rmap);CHKERRQ(ierr); 58 /* the near-null space fo BDDC carries information on quadrature weights, 59 and these can be collinear -> so cheat with MatNullSpaceCreate 60 and create a suitable set of basis vectors first */ 61 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 62 if (i>=first && i < last) { 63 PetscScalar *data; 64 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 65 data[i-first] = 1.; 66 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 67 } 68 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 69 } 70 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 71 for (i=0;i<maxneighs;i++) { /* reset vectors */ 72 PetscInt first,last; 73 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 74 if (i>=first && i < last) { 75 PetscScalar *data; 76 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 77 data[i-first] = 0.; 78 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 79 } 80 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 81 } 82 /* compute local quad vec */ 83 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 84 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 85 ierr = VecSet(p,1.);CHKERRQ(ierr); 86 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 87 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 88 ierr = VecDestroy(&p);CHKERRQ(ierr); 89 /* insert in global quadrature vecs */ 90 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 91 for (i=0;i<nf;i++) { 92 const PetscInt *idxs; 93 PetscInt idx,nn,j; 94 95 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 96 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 97 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 98 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 99 idx = -(idx+1); 100 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 101 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 102 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 103 } 104 for (i=0;i<ne;i++) { 105 const PetscInt *idxs; 106 PetscInt idx,nn,j; 107 108 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 109 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 110 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 111 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 112 idx = -(idx+1); 113 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 114 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 115 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 116 } 117 ierr = PetscFree(edges);CHKERRQ(ierr); 118 ierr = PetscFree(faces);CHKERRQ(ierr); 119 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 120 ierr = VecDestroy(&v);CHKERRQ(ierr); 121 ierr = PetscFree(vals);CHKERRQ(ierr); 122 123 /* assemble near null space */ 124 for (i=0;i<maxneighs;i++) { 125 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 126 } 127 for (i=0;i<maxneighs;i++) { 128 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 129 } 130 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 137 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 138 { 139 PetscErrorCode ierr; 140 Vec local,global; 141 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 142 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 143 144 PetscFunctionBegin; 145 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 146 /* need to convert from global to local topology information and remove references to information in global ordering */ 147 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 148 if (pcbddc->user_provided_isfordofs) { 149 if (pcbddc->n_ISForDofs) { 150 PetscInt i; 151 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 152 for (i=0;i<pcbddc->n_ISForDofs;i++) { 153 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 154 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 155 } 156 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 157 pcbddc->n_ISForDofs = 0; 158 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 159 } 160 } else { 161 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 162 PetscInt i, n = matis->A->rmap->n; 163 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 164 if (i > 1) { 165 pcbddc->n_ISForDofsLocal = i; 166 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 167 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 168 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 169 } 170 } 171 } 172 } 173 174 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 175 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 176 } 177 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 178 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 179 } 180 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 181 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 182 } 183 ierr = VecDestroy(&global);CHKERRQ(ierr); 184 ierr = VecDestroy(&local);CHKERRQ(ierr); 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 190 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 191 { 192 PC_IS *pcis = (PC_IS*)(pc->data); 193 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 194 PetscErrorCode ierr; 195 196 PetscFunctionBegin; 197 if (!pcbddc->benign_have_null) { 198 PetscFunctionReturn(0); 199 } 200 if (pcbddc->ChangeOfBasisMatrix) { 201 Vec swap; 202 203 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 204 swap = pcbddc->work_change; 205 pcbddc->work_change = r; 206 r = swap; 207 } 208 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 209 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 210 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 211 ierr = VecSet(z,0.);CHKERRQ(ierr); 212 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 213 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 214 if (pcbddc->ChangeOfBasisMatrix) { 215 Vec swap; 216 217 swap = r; 218 r = pcbddc->work_change; 219 pcbddc->work_change = swap; 220 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 221 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 222 } 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 228 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 229 { 230 PCBDDCBenignMatMult_ctx ctx; 231 PetscErrorCode ierr; 232 PetscBool apply_right,apply_left,reset_x; 233 234 PetscFunctionBegin; 235 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 236 if (transpose) { 237 apply_right = ctx->apply_left; 238 apply_left = ctx->apply_right; 239 } else { 240 apply_right = ctx->apply_right; 241 apply_left = ctx->apply_left; 242 } 243 reset_x = PETSC_FALSE; 244 if (apply_right) { 245 const PetscScalar *ax; 246 PetscInt nl,i; 247 248 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 249 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 250 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 252 for (i=0;i<ctx->benign_n;i++) { 253 PetscScalar sum,val; 254 const PetscInt *idxs; 255 PetscInt nz,j; 256 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 257 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 258 sum = 0.; 259 if (ctx->apply_p0) { 260 val = ctx->work[idxs[nz-1]]; 261 for (j=0;j<nz-1;j++) { 262 sum += ctx->work[idxs[j]]; 263 ctx->work[idxs[j]] += val; 264 } 265 } else { 266 for (j=0;j<nz-1;j++) { 267 sum += ctx->work[idxs[j]]; 268 } 269 } 270 ctx->work[idxs[nz-1]] -= sum; 271 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 272 } 273 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 274 reset_x = PETSC_TRUE; 275 } 276 if (transpose) { 277 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 278 } else { 279 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 280 } 281 if (reset_x) { 282 ierr = VecResetArray(x);CHKERRQ(ierr); 283 } 284 if (apply_left) { 285 PetscScalar *ay; 286 PetscInt i; 287 288 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 289 for (i=0;i<ctx->benign_n;i++) { 290 PetscScalar sum,val; 291 const PetscInt *idxs; 292 PetscInt nz,j; 293 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 294 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 295 val = -ay[idxs[nz-1]]; 296 if (ctx->apply_p0) { 297 sum = 0.; 298 for (j=0;j<nz-1;j++) { 299 sum += ay[idxs[j]]; 300 ay[idxs[j]] += val; 301 } 302 ay[idxs[nz-1]] += sum; 303 } else { 304 for (j=0;j<nz-1;j++) { 305 ay[idxs[j]] += val; 306 } 307 ay[idxs[nz-1]] = 0.; 308 } 309 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 310 } 311 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 312 } 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 318 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 319 { 320 PetscErrorCode ierr; 321 322 PetscFunctionBegin; 323 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 329 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 330 { 331 PetscErrorCode ierr; 332 333 PetscFunctionBegin; 334 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 #undef __FUNCT__ 339 #define __FUNCT__ "PCBDDCBenignShellMat" 340 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 341 { 342 PC_IS *pcis = (PC_IS*)pc->data; 343 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 344 PCBDDCBenignMatMult_ctx ctx; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 if (!restore) { 349 Mat A_IB,A_BI; 350 PetscScalar *work; 351 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 352 353 if (pcbddc->benign_original_mat) { 354 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 355 } 356 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 357 PetscFunctionReturn(0); 358 } 359 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 360 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 361 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 362 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 363 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 364 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 365 ierr = PetscNew(&ctx);CHKERRQ(ierr); 366 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 367 ctx->apply_left = PETSC_TRUE; 368 ctx->apply_right = PETSC_FALSE; 369 ctx->apply_p0 = PETSC_FALSE; 370 ctx->benign_n = pcbddc->benign_n; 371 if (reuse) { 372 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 373 ctx->free = PETSC_FALSE; 374 } else { /* TODO: could be optimized for successive solves */ 375 ISLocalToGlobalMapping N_to_D; 376 PetscInt i; 377 378 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 379 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 380 for (i=0;i<pcbddc->benign_n;i++) { 381 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 382 } 383 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 384 ctx->free = PETSC_TRUE; 385 } 386 ctx->A = pcis->A_IB; 387 ctx->work = work; 388 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 389 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 391 pcis->A_IB = A_IB; 392 393 /* A_BI as A_IB^T */ 394 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 395 pcbddc->benign_original_mat = pcis->A_BI; 396 pcis->A_BI = A_BI; 397 } else { 398 if (!pcbddc->benign_original_mat) { 399 PetscFunctionReturn(0); 400 } 401 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 403 pcis->A_IB = ctx->A; 404 ctx->A = NULL; 405 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 406 pcis->A_BI = pcbddc->benign_original_mat; 407 pcbddc->benign_original_mat = NULL; 408 if (ctx->free) { 409 PetscInt i; 410 for (i=0;i<ctx->benign_n;i++) { 411 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 412 } 413 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 414 } 415 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 416 ierr = PetscFree(ctx);CHKERRQ(ierr); 417 } 418 PetscFunctionReturn(0); 419 } 420 421 /* used just in bddc debug mode */ 422 #undef __FUNCT__ 423 #define __FUNCT__ "PCBDDCBenignProject" 424 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 425 { 426 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 427 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 428 Mat An; 429 PetscErrorCode ierr; 430 431 PetscFunctionBegin; 432 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 433 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 434 if (is1) { 435 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 436 ierr = MatDestroy(&An);CHKERRQ(ierr); 437 } else { 438 *B = An; 439 } 440 PetscFunctionReturn(0); 441 } 442 443 /* TODO: add reuse flag */ 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatSeqAIJCompress" 446 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 447 { 448 Mat Bt; 449 PetscScalar *a,*bdata; 450 const PetscInt *ii,*ij; 451 PetscInt m,n,i,nnz,*bii,*bij; 452 PetscBool flg_row; 453 PetscErrorCode ierr; 454 455 PetscFunctionBegin; 456 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 457 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 458 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 459 nnz = n; 460 for (i=0;i<ii[n];i++) { 461 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 462 } 463 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 464 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 465 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 466 nnz = 0; 467 bii[0] = 0; 468 for (i=0;i<n;i++) { 469 PetscInt j; 470 for (j=ii[i];j<ii[i+1];j++) { 471 PetscScalar entry = a[j]; 472 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 473 bij[nnz] = ij[j]; 474 bdata[nnz] = entry; 475 nnz++; 476 } 477 } 478 bii[i+1] = nnz; 479 } 480 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 481 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 482 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 483 { 484 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 485 b->free_a = PETSC_TRUE; 486 b->free_ij = PETSC_TRUE; 487 } 488 *B = Bt; 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatDetectDisconnectedComponents" 494 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 495 { 496 Mat B; 497 IS is_dummy,*cc_n; 498 ISLocalToGlobalMapping l2gmap_dummy; 499 PCBDDCGraph graph; 500 PetscInt i,n; 501 PetscInt *xadj,*adjncy; 502 PetscInt *xadj_filtered,*adjncy_filtered; 503 PetscBool flg_row,isseqaij; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 if (!A->rmap->N || !A->cmap->N) { 508 *ncc = 0; 509 *cc = NULL; 510 PetscFunctionReturn(0); 511 } 512 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 513 if (!isseqaij && filter) { 514 PetscBool isseqdense; 515 516 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 517 if (!isseqdense) { 518 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 519 } else { /* TODO: rectangular case and LDA */ 520 PetscScalar *array; 521 PetscReal chop=1.e-6; 522 523 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 524 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 525 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 526 for (i=0;i<n;i++) { 527 PetscInt j; 528 for (j=i+1;j<n;j++) { 529 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 530 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 531 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 532 } 533 } 534 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 535 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 536 } 537 } else { 538 B = A; 539 } 540 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 541 542 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 543 if (filter) { 544 PetscScalar *data; 545 PetscInt j,cum; 546 547 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 548 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 549 cum = 0; 550 for (i=0;i<n;i++) { 551 PetscInt t; 552 553 for (j=xadj[i];j<xadj[i+1];j++) { 554 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 555 continue; 556 } 557 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 558 } 559 t = xadj_filtered[i]; 560 xadj_filtered[i] = cum; 561 cum += t; 562 } 563 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 564 } else { 565 xadj_filtered = NULL; 566 adjncy_filtered = NULL; 567 } 568 569 /* compute local connected components using PCBDDCGraph */ 570 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 571 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 572 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 573 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 574 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 575 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 576 if (xadj_filtered) { 577 graph->xadj = xadj_filtered; 578 graph->adjncy = adjncy_filtered; 579 } else { 580 graph->xadj = xadj; 581 graph->adjncy = adjncy; 582 } 583 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 584 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 585 /* partial clean up */ 586 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 587 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 588 if (A != B) { 589 ierr = MatDestroy(&B);CHKERRQ(ierr); 590 } 591 592 /* get back data */ 593 if (ncc) *ncc = graph->ncc; 594 if (cc) { 595 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 596 for (i=0;i<graph->ncc;i++) { 597 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 598 } 599 *cc = cc_n; 600 } 601 /* clean up graph */ 602 graph->xadj = 0; 603 graph->adjncy = 0; 604 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "PCBDDCBenignCheck" 610 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 611 { 612 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 613 PC_IS* pcis = (PC_IS*)(pc->data); 614 IS dirIS = NULL; 615 PetscInt i; 616 PetscErrorCode ierr; 617 618 PetscFunctionBegin; 619 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 620 if (zerodiag) { 621 Mat A; 622 Vec vec3_N; 623 PetscScalar *vals; 624 const PetscInt *idxs; 625 PetscInt nz,*count; 626 627 /* p0 */ 628 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 629 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 630 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 631 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 632 for (i=0;i<nz;i++) vals[i] = 1.; 633 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 634 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 635 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 636 /* v_I */ 637 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 638 for (i=0;i<nz;i++) vals[i] = 0.; 639 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 641 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 642 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 643 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 644 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 645 if (dirIS) { 646 PetscInt n; 647 648 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 649 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 650 for (i=0;i<n;i++) vals[i] = 0.; 651 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 652 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 653 } 654 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 655 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 656 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 657 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 658 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 659 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 660 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 661 if (PetscAbsScalar(vals[0]) > 1.e-1) { 662 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 663 } 664 ierr = PetscFree(vals);CHKERRQ(ierr); 665 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 666 667 /* there should not be any pressure dofs lying on the interface */ 668 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 669 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 670 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 671 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 672 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 673 for (i=0;i<nz;i++) { 674 if (count[idxs[i]]) { 675 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 676 } 677 } 678 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 679 ierr = PetscFree(count);CHKERRQ(ierr); 680 } 681 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 682 683 /* check PCBDDCBenignGetOrSetP0 */ 684 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 685 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 686 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 687 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 688 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 689 for (i=0;i<pcbddc->benign_n;i++) { 690 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 691 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 692 } 693 } 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 699 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 700 { 701 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 702 IS pressures,zerodiag,*zerodiag_subs; 703 PetscInt nz,n; 704 PetscInt *interior_dofs,n_interior_dofs; 705 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 706 PetscErrorCode ierr; 707 708 PetscFunctionBegin; 709 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 710 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 711 for (n=0;n<pcbddc->benign_n;n++) { 712 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 713 } 714 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 715 pcbddc->benign_n = 0; 716 /* if a local info on dofs is present, assumes that the last field represents "pressures" 717 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 718 Checks if all the pressure dofs in each subdomain have a zero diagonal 719 If not, a change of basis on pressures is not needed 720 since the local Schur complements are already SPD 721 */ 722 has_null_pressures = PETSC_TRUE; 723 have_null = PETSC_TRUE; 724 if (pcbddc->n_ISForDofsLocal) { 725 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 726 727 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 728 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 729 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 731 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 732 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 733 if (!sorted) { 734 ierr = ISSort(pressures);CHKERRQ(ierr); 735 } 736 } else { 737 pressures = NULL; 738 } 739 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 740 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 741 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 742 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 743 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 744 if (!sorted) { 745 ierr = ISSort(zerodiag);CHKERRQ(ierr); 746 } 747 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 748 if (!nz) { 749 if (n) have_null = PETSC_FALSE; 750 has_null_pressures = PETSC_FALSE; 751 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 752 } 753 recompute_zerodiag = PETSC_FALSE; 754 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 755 zerodiag_subs = NULL; 756 pcbddc->benign_n = 0; 757 n_interior_dofs = 0; 758 interior_dofs = NULL; 759 if (pcbddc->current_level) { /* need to compute interior nodes */ 760 PetscInt n,i,j; 761 PetscInt n_neigh,*neigh,*n_shared,**shared; 762 PetscInt *iwork; 763 764 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 765 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 766 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 767 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 768 for (i=0;i<n_neigh;i++) 769 for (j=0;j<n_shared[i];j++) 770 iwork[shared[i][j]] += 1; 771 for (i=0;i<n;i++) 772 if (!iwork[i]) 773 interior_dofs[n_interior_dofs++] = i; 774 ierr = PetscFree(iwork);CHKERRQ(ierr); 775 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 776 } 777 if (has_null_pressures) { 778 IS *subs; 779 PetscInt nsubs,i,j,nl; 780 const PetscInt *idxs; 781 PetscScalar *array; 782 Vec *work; 783 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 784 785 subs = pcbddc->local_subs; 786 nsubs = pcbddc->n_local_subs; 787 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 788 if (pcbddc->current_level) { 789 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 790 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 791 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 792 /* work[0] = 1_p */ 793 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 794 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 795 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 796 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 797 /* work[0] = 1_v */ 798 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 799 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 800 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 801 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 802 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 803 } 804 if (nsubs > 1) { 805 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 806 for (i=0;i<nsubs;i++) { 807 ISLocalToGlobalMapping l2g; 808 IS t_zerodiag_subs; 809 PetscInt nl; 810 811 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 812 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 813 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 814 if (nl) { 815 PetscBool valid = PETSC_TRUE; 816 817 if (pcbddc->current_level) { 818 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 819 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 820 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 821 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 822 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 823 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 824 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 825 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 826 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 827 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 828 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 829 for (j=0;j<n_interior_dofs;j++) { 830 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 831 valid = PETSC_FALSE; 832 break; 833 } 834 } 835 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 836 } 837 if (valid && pcbddc->NeumannBoundariesLocal) { 838 IS t_bc; 839 PetscInt nzb; 840 841 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 842 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 843 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 844 if (nzb) valid = PETSC_FALSE; 845 } 846 if (valid && pressures) { 847 IS t_pressure_subs; 848 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 849 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 850 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 851 } 852 if (valid) { 853 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 854 pcbddc->benign_n++; 855 } else { 856 recompute_zerodiag = PETSC_TRUE; 857 } 858 } 859 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 860 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 861 } 862 } else { /* there's just one subdomain (or zero if they have not been detected */ 863 PetscBool valid = PETSC_TRUE; 864 865 if (pcbddc->NeumannBoundariesLocal) { 866 PetscInt nzb; 867 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 868 if (nzb) valid = PETSC_FALSE; 869 } 870 if (valid && pressures) { 871 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 872 } 873 if (valid && pcbddc->current_level) { 874 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 875 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 876 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 877 for (j=0;j<n_interior_dofs;j++) { 878 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 879 valid = PETSC_FALSE; 880 break; 881 } 882 } 883 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 884 } 885 if (valid) { 886 pcbddc->benign_n = 1; 887 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 888 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 889 zerodiag_subs[0] = zerodiag; 890 } 891 } 892 if (pcbddc->current_level) { 893 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 894 } 895 } 896 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 897 898 if (!pcbddc->benign_n) { 899 PetscInt n; 900 901 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 902 recompute_zerodiag = PETSC_FALSE; 903 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 904 if (n) { 905 has_null_pressures = PETSC_FALSE; 906 have_null = PETSC_FALSE; 907 } 908 } 909 910 /* final check for null pressures */ 911 if (zerodiag && pressures) { 912 PetscInt nz,np; 913 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 914 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 915 if (nz != np) have_null = PETSC_FALSE; 916 } 917 918 if (recompute_zerodiag) { 919 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 920 if (pcbddc->benign_n == 1) { 921 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 922 zerodiag = zerodiag_subs[0]; 923 } else { 924 PetscInt i,nzn,*new_idxs; 925 926 nzn = 0; 927 for (i=0;i<pcbddc->benign_n;i++) { 928 PetscInt ns; 929 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 930 nzn += ns; 931 } 932 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 933 nzn = 0; 934 for (i=0;i<pcbddc->benign_n;i++) { 935 PetscInt ns,*idxs; 936 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 937 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 938 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 939 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 940 nzn += ns; 941 } 942 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 943 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 944 } 945 have_null = PETSC_FALSE; 946 } 947 948 /* Prepare matrix to compute no-net-flux */ 949 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 950 Mat A,loc_divudotp; 951 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 952 IS row,col,isused = NULL; 953 PetscInt M,N,n,st,n_isused; 954 955 if (pressures) { 956 isused = pressures; 957 } else { 958 isused = zerodiag; 959 } 960 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 961 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 962 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 963 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 964 n_isused = 0; 965 if (isused) { 966 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 967 } 968 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 969 st = st-n_isused; 970 if (n) { 971 const PetscInt *gidxs; 972 973 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 974 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 975 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 976 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 977 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 978 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 979 } else { 980 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 981 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 982 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 983 } 984 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 985 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 986 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 987 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 988 ierr = ISDestroy(&row);CHKERRQ(ierr); 989 ierr = ISDestroy(&col);CHKERRQ(ierr); 990 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 991 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 992 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 993 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 994 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 995 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 996 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 997 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 998 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 999 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1000 } 1001 1002 /* change of basis and p0 dofs */ 1003 if (has_null_pressures) { 1004 IS zerodiagc; 1005 const PetscInt *idxs,*idxsc; 1006 PetscInt i,s,*nnz; 1007 1008 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1009 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1010 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1011 /* local change of basis for pressures */ 1012 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1013 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1014 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1015 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1016 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1017 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1018 for (i=0;i<pcbddc->benign_n;i++) { 1019 PetscInt nzs,j; 1020 1021 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1022 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1023 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1024 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1025 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1026 } 1027 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1028 ierr = PetscFree(nnz);CHKERRQ(ierr); 1029 /* set identity on velocities */ 1030 for (i=0;i<n-nz;i++) { 1031 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1032 } 1033 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1034 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1035 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1036 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1037 /* set change on pressures */ 1038 for (s=0;s<pcbddc->benign_n;s++) { 1039 PetscScalar *array; 1040 PetscInt nzs; 1041 1042 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1043 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1044 for (i=0;i<nzs-1;i++) { 1045 PetscScalar vals[2]; 1046 PetscInt cols[2]; 1047 1048 cols[0] = idxs[i]; 1049 cols[1] = idxs[nzs-1]; 1050 vals[0] = 1.; 1051 vals[1] = 1.; 1052 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1053 } 1054 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1055 for (i=0;i<nzs-1;i++) array[i] = -1.; 1056 array[nzs-1] = 1.; 1057 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1058 /* store local idxs for p0 */ 1059 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1060 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1061 ierr = PetscFree(array);CHKERRQ(ierr); 1062 } 1063 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1064 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1065 /* project if needed */ 1066 if (pcbddc->benign_change_explicit) { 1067 Mat M; 1068 1069 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1070 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1071 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1072 ierr = MatDestroy(&M);CHKERRQ(ierr); 1073 } 1074 /* store global idxs for p0 */ 1075 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1076 } 1077 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1078 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1079 1080 /* determines if the coarse solver will be singular or not */ 1081 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1082 /* determines if the problem has subdomains with 0 pressure block */ 1083 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1084 *zerodiaglocal = zerodiag; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1090 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1091 { 1092 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1093 PetscScalar *array; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 if (!pcbddc->benign_sf) { 1098 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1099 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1100 } 1101 if (get) { 1102 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1103 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1104 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1105 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1106 } else { 1107 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1108 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1109 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1110 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1117 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1118 { 1119 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 /* TODO: add error checking 1124 - avoid nested pop (or push) calls. 1125 - cannot push before pop. 1126 - cannot call this if pcbddc->local_mat is NULL 1127 */ 1128 if (!pcbddc->benign_n) { 1129 PetscFunctionReturn(0); 1130 } 1131 if (pop) { 1132 if (pcbddc->benign_change_explicit) { 1133 IS is_p0; 1134 MatReuse reuse; 1135 1136 /* extract B_0 */ 1137 reuse = MAT_INITIAL_MATRIX; 1138 if (pcbddc->benign_B0) { 1139 reuse = MAT_REUSE_MATRIX; 1140 } 1141 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1142 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1143 /* remove rows and cols from local problem */ 1144 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1145 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1146 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1147 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1148 } else { 1149 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1150 PetscScalar *vals; 1151 PetscInt i,n,*idxs_ins; 1152 1153 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1154 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1155 if (!pcbddc->benign_B0) { 1156 PetscInt *nnz; 1157 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1158 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1159 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1160 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1161 for (i=0;i<pcbddc->benign_n;i++) { 1162 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1163 nnz[i] = n - nnz[i]; 1164 } 1165 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1166 ierr = PetscFree(nnz);CHKERRQ(ierr); 1167 } 1168 1169 for (i=0;i<pcbddc->benign_n;i++) { 1170 PetscScalar *array; 1171 PetscInt *idxs,j,nz,cum; 1172 1173 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1174 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1175 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1176 for (j=0;j<nz;j++) vals[j] = 1.; 1177 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1178 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1179 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1180 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1181 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1182 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1183 cum = 0; 1184 for (j=0;j<n;j++) { 1185 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1186 vals[cum] = array[j]; 1187 idxs_ins[cum] = j; 1188 cum++; 1189 } 1190 } 1191 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1192 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1193 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1194 } 1195 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1196 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1197 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1198 } 1199 } else { /* push */ 1200 if (pcbddc->benign_change_explicit) { 1201 PetscInt i; 1202 1203 for (i=0;i<pcbddc->benign_n;i++) { 1204 PetscScalar *B0_vals; 1205 PetscInt *B0_cols,B0_ncol; 1206 1207 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1208 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1209 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1210 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1211 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1212 } 1213 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1214 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1215 } else { 1216 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1217 } 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1224 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1225 { 1226 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1227 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1228 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1229 PetscBLASInt *B_iwork,*B_ifail; 1230 PetscScalar *work,lwork; 1231 PetscScalar *St,*S,*eigv; 1232 PetscScalar *Sarray,*Starray; 1233 PetscReal *eigs,thresh; 1234 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1235 PetscBool allocated_S_St; 1236 #if defined(PETSC_USE_COMPLEX) 1237 PetscReal *rwork; 1238 #endif 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1243 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1244 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1245 1246 if (pcbddc->dbg_flag) { 1247 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1248 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1249 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1250 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1251 } 1252 1253 if (pcbddc->dbg_flag) { 1254 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1255 } 1256 1257 /* max size of subsets */ 1258 mss = 0; 1259 for (i=0;i<sub_schurs->n_subs;i++) { 1260 PetscInt subset_size; 1261 1262 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1263 mss = PetscMax(mss,subset_size); 1264 } 1265 1266 /* min/max and threshold */ 1267 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1268 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1269 nmax = PetscMax(nmin,nmax); 1270 allocated_S_St = PETSC_FALSE; 1271 if (nmin) { 1272 allocated_S_St = PETSC_TRUE; 1273 } 1274 1275 /* allocate lapack workspace */ 1276 cum = cum2 = 0; 1277 maxneigs = 0; 1278 for (i=0;i<sub_schurs->n_subs;i++) { 1279 PetscInt n,subset_size; 1280 1281 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1282 n = PetscMin(subset_size,nmax); 1283 cum += subset_size; 1284 cum2 += subset_size*n; 1285 maxneigs = PetscMax(maxneigs,n); 1286 } 1287 if (mss) { 1288 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1289 PetscBLASInt B_itype = 1; 1290 PetscBLASInt B_N = mss; 1291 PetscReal zero = 0.0; 1292 PetscReal eps = 0.0; /* dlamch? */ 1293 1294 B_lwork = -1; 1295 S = NULL; 1296 St = NULL; 1297 eigs = NULL; 1298 eigv = NULL; 1299 B_iwork = NULL; 1300 B_ifail = NULL; 1301 #if defined(PETSC_USE_COMPLEX) 1302 rwork = NULL; 1303 #endif 1304 thresh = 1.0; 1305 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1306 #if defined(PETSC_USE_COMPLEX) 1307 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1308 #else 1309 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1310 #endif 1311 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1312 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1313 } else { 1314 /* TODO */ 1315 } 1316 } else { 1317 lwork = 0; 1318 } 1319 1320 nv = 0; 1321 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1322 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1323 } 1324 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1325 if (allocated_S_St) { 1326 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1327 } 1328 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1329 #if defined(PETSC_USE_COMPLEX) 1330 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1331 #endif 1332 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1333 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1334 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1335 nv+cum,&pcbddc->adaptive_constraints_idxs, 1336 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1337 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1338 1339 maxneigs = 0; 1340 cum = cumarray = 0; 1341 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1342 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1343 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1344 const PetscInt *idxs; 1345 1346 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1347 for (cum=0;cum<nv;cum++) { 1348 pcbddc->adaptive_constraints_n[cum] = 1; 1349 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1350 pcbddc->adaptive_constraints_data[cum] = 1.0; 1351 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1352 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1353 } 1354 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1355 } 1356 1357 if (mss) { /* multilevel */ 1358 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1359 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1360 } 1361 1362 thresh = pcbddc->adaptive_threshold; 1363 for (i=0;i<sub_schurs->n_subs;i++) { 1364 const PetscInt *idxs; 1365 PetscReal upper,lower; 1366 PetscInt j,subset_size,eigs_start = 0; 1367 PetscBLASInt B_N; 1368 PetscBool same_data = PETSC_FALSE; 1369 1370 if (pcbddc->use_deluxe_scaling) { 1371 upper = PETSC_MAX_REAL; 1372 lower = thresh; 1373 } else { 1374 upper = 1./thresh; 1375 lower = 0.; 1376 } 1377 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1378 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1379 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1380 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1381 if (sub_schurs->is_hermitian) { 1382 PetscInt j,k; 1383 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1384 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1385 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1386 } 1387 for (j=0;j<subset_size;j++) { 1388 for (k=j;k<subset_size;k++) { 1389 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1390 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1391 } 1392 } 1393 } else { 1394 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1395 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1396 } 1397 } else { 1398 S = Sarray + cumarray; 1399 St = Starray + cumarray; 1400 } 1401 /* see if we can save some work */ 1402 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1403 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1404 } 1405 1406 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1407 B_neigs = 0; 1408 } else { 1409 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1410 PetscBLASInt B_itype = 1; 1411 PetscBLASInt B_IL, B_IU; 1412 PetscReal eps = -1.0; /* dlamch? */ 1413 PetscInt nmin_s; 1414 PetscBool compute_range = PETSC_FALSE; 1415 1416 if (pcbddc->dbg_flag) { 1417 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1418 } 1419 1420 compute_range = PETSC_FALSE; 1421 if (thresh > 1.+PETSC_SMALL && !same_data) { 1422 compute_range = PETSC_TRUE; 1423 } 1424 1425 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1426 if (compute_range) { 1427 1428 /* ask for eigenvalues larger than thresh */ 1429 #if defined(PETSC_USE_COMPLEX) 1430 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1431 #else 1432 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1433 #endif 1434 } else if (!same_data) { 1435 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1436 B_IL = 1; 1437 #if defined(PETSC_USE_COMPLEX) 1438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1439 #else 1440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1441 #endif 1442 } else { /* same_data is true, so get the adaptive function requested by the user */ 1443 PetscInt k; 1444 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1445 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1446 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1447 nmin = nmax; 1448 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1449 for (k=0;k<nmax;k++) { 1450 eigs[k] = 1./PETSC_SMALL; 1451 eigv[k*(subset_size+1)] = 1.0; 1452 } 1453 } 1454 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1455 if (B_ierr) { 1456 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1457 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1458 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1459 } 1460 1461 if (B_neigs > nmax) { 1462 if (pcbddc->dbg_flag) { 1463 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1464 } 1465 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1466 B_neigs = nmax; 1467 } 1468 1469 nmin_s = PetscMin(nmin,B_N); 1470 if (B_neigs < nmin_s) { 1471 PetscBLASInt B_neigs2; 1472 1473 if (pcbddc->use_deluxe_scaling) { 1474 B_IL = B_N - nmin_s + 1; 1475 B_IU = B_N - B_neigs; 1476 } else { 1477 B_IL = B_neigs + 1; 1478 B_IU = nmin_s; 1479 } 1480 if (pcbddc->dbg_flag) { 1481 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1482 } 1483 if (sub_schurs->is_hermitian) { 1484 PetscInt j,k; 1485 for (j=0;j<subset_size;j++) { 1486 for (k=j;k<subset_size;k++) { 1487 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1488 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1489 } 1490 } 1491 } else { 1492 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1493 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1494 } 1495 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1496 #if defined(PETSC_USE_COMPLEX) 1497 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1498 #else 1499 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1500 #endif 1501 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1502 B_neigs += B_neigs2; 1503 } 1504 if (B_ierr) { 1505 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1506 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1507 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1508 } 1509 if (pcbddc->dbg_flag) { 1510 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1511 for (j=0;j<B_neigs;j++) { 1512 if (eigs[j] == 0.0) { 1513 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1514 } else { 1515 if (pcbddc->use_deluxe_scaling) { 1516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1517 } else { 1518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1519 } 1520 } 1521 } 1522 } 1523 } else { 1524 /* TODO */ 1525 } 1526 } 1527 /* change the basis back to the original one */ 1528 if (sub_schurs->change) { 1529 Mat change,phi,phit; 1530 1531 if (pcbddc->dbg_flag > 1) { 1532 PetscInt ii; 1533 for (ii=0;ii<B_neigs;ii++) { 1534 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1535 for (j=0;j<B_N;j++) { 1536 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1537 } 1538 } 1539 } 1540 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1542 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1543 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1544 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1545 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1546 } 1547 maxneigs = PetscMax(B_neigs,maxneigs); 1548 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1549 if (B_neigs) { 1550 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1551 1552 if (pcbddc->dbg_flag > 1) { 1553 PetscInt ii; 1554 for (ii=0;ii<B_neigs;ii++) { 1555 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1556 for (j=0;j<B_N;j++) { 1557 #if defined(PETSC_USE_COMPLEX) 1558 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1559 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1560 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1561 #else 1562 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1563 #endif 1564 } 1565 } 1566 } 1567 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1568 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1569 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1570 cum++; 1571 } 1572 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1573 /* shift for next computation */ 1574 cumarray += subset_size*subset_size; 1575 } 1576 if (pcbddc->dbg_flag) { 1577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1578 } 1579 1580 if (mss) { 1581 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1582 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1583 /* destroy matrices (junk) */ 1584 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1585 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1586 } 1587 if (allocated_S_St) { 1588 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1589 } 1590 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1591 #if defined(PETSC_USE_COMPLEX) 1592 ierr = PetscFree(rwork);CHKERRQ(ierr); 1593 #endif 1594 if (pcbddc->dbg_flag) { 1595 PetscInt maxneigs_r; 1596 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1597 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "PCBDDCSetUpSolvers" 1604 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1605 { 1606 PetscScalar *coarse_submat_vals; 1607 PetscErrorCode ierr; 1608 1609 PetscFunctionBegin; 1610 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1611 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1612 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1613 1614 /* Setup local neumann solver ksp_R */ 1615 /* PCBDDCSetUpLocalScatters should be called first! */ 1616 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1617 1618 /* 1619 Setup local correction and local part of coarse basis. 1620 Gives back the dense local part of the coarse matrix in column major ordering 1621 */ 1622 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1623 1624 /* Compute total number of coarse nodes and setup coarse solver */ 1625 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1626 1627 /* free */ 1628 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "PCBDDCResetCustomization" 1634 PetscErrorCode PCBDDCResetCustomization(PC pc) 1635 { 1636 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1637 PetscErrorCode ierr; 1638 1639 PetscFunctionBegin; 1640 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1641 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1642 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1643 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1644 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1645 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1646 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1647 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1648 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1649 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1650 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "PCBDDCResetTopography" 1656 PetscErrorCode PCBDDCResetTopography(PC pc) 1657 { 1658 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1659 PetscInt i; 1660 PetscErrorCode ierr; 1661 1662 PetscFunctionBegin; 1663 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1664 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1665 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1666 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1667 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1668 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1669 ierr = ISLocalToGlobalMappingDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1670 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1671 for (i=0;i<pcbddc->n_local_subs;i++) { 1672 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1673 } 1674 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1675 if (pcbddc->sub_schurs) { 1676 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "PCBDDCResetSolvers" 1683 PetscErrorCode PCBDDCResetSolvers(PC pc) 1684 { 1685 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1686 PetscErrorCode ierr; 1687 1688 PetscFunctionBegin; 1689 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1690 if (pcbddc->coarse_phi_B) { 1691 PetscScalar *array; 1692 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1693 ierr = PetscFree(array);CHKERRQ(ierr); 1694 } 1695 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1696 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1697 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1698 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1699 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1700 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1701 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1702 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1703 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1704 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1705 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1706 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1707 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1708 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1709 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1710 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1711 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1712 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1713 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1714 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1715 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1716 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1717 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1718 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1719 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1720 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1721 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1722 if (pcbddc->benign_zerodiag_subs) { 1723 PetscInt i; 1724 for (i=0;i<pcbddc->benign_n;i++) { 1725 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1726 } 1727 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1728 } 1729 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1735 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1736 { 1737 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1738 PC_IS *pcis = (PC_IS*)pc->data; 1739 VecType impVecType; 1740 PetscInt n_constraints,n_R,old_size; 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1745 /* get sizes */ 1746 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1747 n_R = pcis->n - pcbddc->n_vertices; 1748 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1749 /* local work vectors (try to avoid unneeded work)*/ 1750 /* R nodes */ 1751 old_size = -1; 1752 if (pcbddc->vec1_R) { 1753 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1754 } 1755 if (n_R != old_size) { 1756 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1757 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1758 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1759 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1760 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1761 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1762 } 1763 /* local primal dofs */ 1764 old_size = -1; 1765 if (pcbddc->vec1_P) { 1766 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1767 } 1768 if (pcbddc->local_primal_size != old_size) { 1769 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1770 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1771 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1772 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1773 } 1774 /* local explicit constraints */ 1775 old_size = -1; 1776 if (pcbddc->vec1_C) { 1777 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1778 } 1779 if (n_constraints && n_constraints != old_size) { 1780 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1781 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1782 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1783 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1784 } 1785 PetscFunctionReturn(0); 1786 } 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "PCBDDCSetUpCorrection" 1790 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1791 { 1792 PetscErrorCode ierr; 1793 /* pointers to pcis and pcbddc */ 1794 PC_IS* pcis = (PC_IS*)pc->data; 1795 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1796 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1797 /* submatrices of local problem */ 1798 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1799 /* submatrices of local coarse problem */ 1800 Mat S_VV,S_CV,S_VC,S_CC; 1801 /* working matrices */ 1802 Mat C_CR; 1803 /* additional working stuff */ 1804 PC pc_R; 1805 Mat F; 1806 Vec dummy_vec; 1807 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1808 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1809 PetscScalar *work; 1810 PetscInt *idx_V_B; 1811 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1812 PetscInt i,n_R,n_D,n_B; 1813 1814 /* some shortcuts to scalars */ 1815 PetscScalar one=1.0,m_one=-1.0; 1816 1817 PetscFunctionBegin; 1818 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1819 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1820 } 1821 1822 /* Set Non-overlapping dimensions */ 1823 n_vertices = pcbddc->n_vertices; 1824 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1825 n_B = pcis->n_B; 1826 n_D = pcis->n - n_B; 1827 n_R = pcis->n - n_vertices; 1828 1829 /* vertices in boundary numbering */ 1830 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1831 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1832 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1833 1834 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1835 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1836 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1837 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1839 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1840 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1841 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1843 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1844 1845 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1846 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1847 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1848 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1849 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1850 lda_rhs = n_R; 1851 need_benign_correction = PETSC_FALSE; 1852 if (isLU || isILU || isCHOL) { 1853 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1854 } else if (sub_schurs && sub_schurs->reuse_solver) { 1855 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1856 MatFactorType type; 1857 1858 F = reuse_solver->F; 1859 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1860 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1861 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1862 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1863 } else { 1864 F = NULL; 1865 } 1866 1867 /* allocate workspace */ 1868 n = 0; 1869 if (n_constraints) { 1870 n += lda_rhs*n_constraints; 1871 } 1872 if (n_vertices) { 1873 n = PetscMax(2*lda_rhs*n_vertices,n); 1874 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1875 } 1876 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1877 1878 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1879 dummy_vec = NULL; 1880 if (need_benign_correction && lda_rhs != n_R && F) { 1881 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1882 } 1883 1884 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1885 if (n_constraints) { 1886 Mat M1,M2,M3,C_B; 1887 IS is_aux; 1888 PetscScalar *array,*array2; 1889 1890 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1891 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1892 1893 /* Extract constraints on R nodes: C_{CR} */ 1894 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1895 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1896 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1897 1898 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1899 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1900 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1901 for (i=0;i<n_constraints;i++) { 1902 const PetscScalar *row_cmat_values; 1903 const PetscInt *row_cmat_indices; 1904 PetscInt size_of_constraint,j; 1905 1906 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1907 for (j=0;j<size_of_constraint;j++) { 1908 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1909 } 1910 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1911 } 1912 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1913 if (F) { 1914 Mat B; 1915 1916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1917 if (need_benign_correction) { 1918 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1919 1920 /* rhs is already zero on interior dofs, no need to change the rhs */ 1921 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1922 } 1923 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1924 if (need_benign_correction) { 1925 PetscScalar *marr; 1926 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1927 1928 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1929 if (lda_rhs != n_R) { 1930 for (i=0;i<n_constraints;i++) { 1931 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1932 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1933 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1934 } 1935 } else { 1936 for (i=0;i<n_constraints;i++) { 1937 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1938 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1939 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1940 } 1941 } 1942 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1943 } 1944 ierr = MatDestroy(&B);CHKERRQ(ierr); 1945 } else { 1946 PetscScalar *marr; 1947 1948 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1949 for (i=0;i<n_constraints;i++) { 1950 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1951 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1952 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1953 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1954 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1955 } 1956 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1957 } 1958 if (!pcbddc->switch_static) { 1959 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1960 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1961 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1962 for (i=0;i<n_constraints;i++) { 1963 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1964 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1965 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1966 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1967 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1968 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1969 } 1970 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1971 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1972 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1973 } else { 1974 if (lda_rhs != n_R) { 1975 IS dummy; 1976 1977 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1978 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1979 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1980 } else { 1981 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1982 pcbddc->local_auxmat2 = local_auxmat2_R; 1983 } 1984 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1985 } 1986 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1987 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1988 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1989 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1990 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1991 if (isCHOL) { 1992 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1993 } else { 1994 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1995 } 1996 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1997 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1998 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1999 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2000 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2001 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2002 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2003 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2004 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2005 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2006 } 2007 2008 /* Get submatrices from subdomain matrix */ 2009 if (n_vertices) { 2010 IS is_aux; 2011 2012 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2013 IS tis; 2014 2015 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2016 ierr = ISSort(tis);CHKERRQ(ierr); 2017 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2018 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2019 } else { 2020 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2021 } 2022 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2023 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2024 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2025 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2026 } 2027 2028 /* Matrix of coarse basis functions (local) */ 2029 if (pcbddc->coarse_phi_B) { 2030 PetscInt on_B,on_primal,on_D=n_D; 2031 if (pcbddc->coarse_phi_D) { 2032 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2033 } 2034 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2035 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2036 PetscScalar *marray; 2037 2038 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2039 ierr = PetscFree(marray);CHKERRQ(ierr); 2040 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2041 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2042 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2043 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2044 } 2045 } 2046 2047 if (!pcbddc->coarse_phi_B) { 2048 PetscScalar *marray; 2049 2050 n = n_B*pcbddc->local_primal_size; 2051 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2052 n += n_D*pcbddc->local_primal_size; 2053 } 2054 if (!pcbddc->symmetric_primal) { 2055 n *= 2; 2056 } 2057 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2058 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2059 n = n_B*pcbddc->local_primal_size; 2060 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2061 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2062 n += n_D*pcbddc->local_primal_size; 2063 } 2064 if (!pcbddc->symmetric_primal) { 2065 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2066 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2067 n = n_B*pcbddc->local_primal_size; 2068 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2069 } 2070 } else { 2071 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2072 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2073 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2074 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2075 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2076 } 2077 } 2078 } 2079 2080 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2081 p0_lidx_I = NULL; 2082 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2083 const PetscInt *idxs; 2084 2085 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2086 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2087 for (i=0;i<pcbddc->benign_n;i++) { 2088 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2089 } 2090 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2091 } 2092 2093 /* vertices */ 2094 if (n_vertices) { 2095 2096 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2097 2098 if (n_R) { 2099 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2100 PetscBLASInt B_N,B_one = 1; 2101 PetscScalar *x,*y; 2102 PetscBool isseqaij; 2103 2104 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2105 if (need_benign_correction) { 2106 ISLocalToGlobalMapping RtoN; 2107 IS is_p0; 2108 PetscInt *idxs_p0,n; 2109 2110 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2111 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2112 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2113 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2114 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2115 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2116 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2117 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2118 } 2119 2120 if (lda_rhs == n_R) { 2121 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2122 } else { 2123 PetscScalar *av,*array; 2124 const PetscInt *xadj,*adjncy; 2125 PetscInt n; 2126 PetscBool flg_row; 2127 2128 array = work+lda_rhs*n_vertices; 2129 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2130 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2131 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2132 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2133 for (i=0;i<n;i++) { 2134 PetscInt j; 2135 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2136 } 2137 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2138 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2139 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2140 } 2141 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2142 if (need_benign_correction) { 2143 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2144 PetscScalar *marr; 2145 2146 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2147 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2148 2149 | 0 0 0 | (V) 2150 L = | 0 0 -1 | (P-p0) 2151 | 0 0 -1 | (p0) 2152 2153 */ 2154 for (i=0;i<reuse_solver->benign_n;i++) { 2155 const PetscScalar *vals; 2156 const PetscInt *idxs,*idxs_zero; 2157 PetscInt n,j,nz; 2158 2159 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2160 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2161 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2162 for (j=0;j<n;j++) { 2163 PetscScalar val = vals[j]; 2164 PetscInt k,col = idxs[j]; 2165 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2166 } 2167 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2168 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2169 } 2170 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2171 } 2172 if (F) { 2173 /* need to correct the rhs */ 2174 if (need_benign_correction) { 2175 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2176 PetscScalar *marr; 2177 2178 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2179 if (lda_rhs != n_R) { 2180 for (i=0;i<n_vertices;i++) { 2181 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2182 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2183 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2184 } 2185 } else { 2186 for (i=0;i<n_vertices;i++) { 2187 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2188 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2189 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2190 } 2191 } 2192 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2193 } 2194 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2195 /* need to correct the solution */ 2196 if (need_benign_correction) { 2197 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2198 PetscScalar *marr; 2199 2200 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2201 if (lda_rhs != n_R) { 2202 for (i=0;i<n_vertices;i++) { 2203 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2204 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2205 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2206 } 2207 } else { 2208 for (i=0;i<n_vertices;i++) { 2209 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2210 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2211 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2212 } 2213 } 2214 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2215 } 2216 } else { 2217 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2218 for (i=0;i<n_vertices;i++) { 2219 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2220 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2221 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2222 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2223 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2224 } 2225 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2226 } 2227 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2228 /* S_VV and S_CV */ 2229 if (n_constraints) { 2230 Mat B; 2231 2232 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2233 for (i=0;i<n_vertices;i++) { 2234 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2235 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2236 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2237 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2238 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2239 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2240 } 2241 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2242 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2243 ierr = MatDestroy(&B);CHKERRQ(ierr); 2244 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2245 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2246 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2247 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2248 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2249 ierr = MatDestroy(&B);CHKERRQ(ierr); 2250 } 2251 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2252 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2253 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2254 } 2255 if (lda_rhs != n_R) { 2256 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2257 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2258 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2259 } 2260 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2261 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2262 if (need_benign_correction) { 2263 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2264 PetscScalar *marr,*sums; 2265 2266 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2267 ierr = MatDenseGetArray(S_VVt,&marr); 2268 for (i=0;i<reuse_solver->benign_n;i++) { 2269 const PetscScalar *vals; 2270 const PetscInt *idxs,*idxs_zero; 2271 PetscInt n,j,nz; 2272 2273 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2274 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2275 for (j=0;j<n_vertices;j++) { 2276 PetscInt k; 2277 sums[j] = 0.; 2278 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2279 } 2280 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2281 for (j=0;j<n;j++) { 2282 PetscScalar val = vals[j]; 2283 PetscInt k; 2284 for (k=0;k<n_vertices;k++) { 2285 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2286 } 2287 } 2288 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2289 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2290 } 2291 ierr = PetscFree(sums);CHKERRQ(ierr); 2292 ierr = MatDenseRestoreArray(S_VVt,&marr); 2293 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2294 } 2295 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2296 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2297 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2298 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2299 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2300 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2301 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2302 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2303 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2304 } else { 2305 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2306 } 2307 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2308 2309 /* coarse basis functions */ 2310 for (i=0;i<n_vertices;i++) { 2311 PetscScalar *y; 2312 2313 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2314 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2315 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2316 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2317 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2318 y[n_B*i+idx_V_B[i]] = 1.0; 2319 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2320 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2321 2322 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2323 PetscInt j; 2324 2325 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2326 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2327 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2328 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2329 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2330 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2331 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2332 } 2333 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2334 } 2335 /* if n_R == 0 the object is not destroyed */ 2336 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2337 } 2338 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2339 2340 if (n_constraints) { 2341 Mat B; 2342 2343 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2344 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2345 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2346 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2347 if (n_vertices) { 2348 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2349 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2350 } else { 2351 Mat S_VCt; 2352 2353 if (lda_rhs != n_R) { 2354 ierr = MatDestroy(&B);CHKERRQ(ierr); 2355 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2356 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2357 } 2358 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2359 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2360 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2361 } 2362 } 2363 ierr = MatDestroy(&B);CHKERRQ(ierr); 2364 /* coarse basis functions */ 2365 for (i=0;i<n_constraints;i++) { 2366 PetscScalar *y; 2367 2368 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2369 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2370 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2371 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2372 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2373 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2374 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2375 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2376 PetscInt j; 2377 2378 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2379 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2380 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2381 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2382 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2383 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2384 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2385 } 2386 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2387 } 2388 } 2389 if (n_constraints) { 2390 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2391 } 2392 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2393 2394 /* coarse matrix entries relative to B_0 */ 2395 if (pcbddc->benign_n) { 2396 Mat B0_B,B0_BPHI; 2397 IS is_dummy; 2398 PetscScalar *data; 2399 PetscInt j; 2400 2401 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2402 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2403 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2404 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2405 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2406 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2407 for (j=0;j<pcbddc->benign_n;j++) { 2408 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2409 for (i=0;i<pcbddc->local_primal_size;i++) { 2410 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2411 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2412 } 2413 } 2414 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2415 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2416 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2417 } 2418 2419 /* compute other basis functions for non-symmetric problems */ 2420 if (!pcbddc->symmetric_primal) { 2421 Mat B_V=NULL,B_C=NULL; 2422 PetscScalar *marray; 2423 2424 if (n_constraints) { 2425 Mat S_CCT,C_CRT; 2426 2427 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2428 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2429 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2430 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2431 if (n_vertices) { 2432 Mat S_VCT; 2433 2434 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2435 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2436 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2437 } 2438 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2439 } else { 2440 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2441 } 2442 if (n_vertices && n_R) { 2443 PetscScalar *av,*marray; 2444 const PetscInt *xadj,*adjncy; 2445 PetscInt n; 2446 PetscBool flg_row; 2447 2448 /* B_V = B_V - A_VR^T */ 2449 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2450 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2451 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2452 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2453 for (i=0;i<n;i++) { 2454 PetscInt j; 2455 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2456 } 2457 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2458 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2459 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2460 } 2461 2462 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2463 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2464 for (i=0;i<n_vertices;i++) { 2465 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2466 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2467 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2468 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2469 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2470 } 2471 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2472 if (B_C) { 2473 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2474 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2475 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2476 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2477 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2478 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2479 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2480 } 2481 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2482 } 2483 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2484 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2485 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2486 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2487 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2488 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2489 } 2490 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2491 /* coarse basis functions */ 2492 for (i=0;i<pcbddc->local_primal_size;i++) { 2493 PetscScalar *y; 2494 2495 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2496 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2497 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2498 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2499 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2500 if (i<n_vertices) { 2501 y[n_B*i+idx_V_B[i]] = 1.0; 2502 } 2503 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2504 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2505 2506 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2507 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2508 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2509 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2511 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2512 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2513 } 2514 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2515 } 2516 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2517 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2518 } 2519 /* free memory */ 2520 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2521 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2522 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2523 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2524 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2525 ierr = PetscFree(work);CHKERRQ(ierr); 2526 if (n_vertices) { 2527 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2528 } 2529 if (n_constraints) { 2530 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2531 } 2532 /* Checking coarse_sub_mat and coarse basis functios */ 2533 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2534 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2535 if (pcbddc->dbg_flag) { 2536 Mat coarse_sub_mat; 2537 Mat AUXMAT,TM1,TM2,TM3,TM4; 2538 Mat coarse_phi_D,coarse_phi_B; 2539 Mat coarse_psi_D,coarse_psi_B; 2540 Mat A_II,A_BB,A_IB,A_BI; 2541 Mat C_B,CPHI; 2542 IS is_dummy; 2543 Vec mones; 2544 MatType checkmattype=MATSEQAIJ; 2545 PetscReal real_value; 2546 2547 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2548 Mat A; 2549 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2550 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2551 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2552 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2553 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2554 ierr = MatDestroy(&A);CHKERRQ(ierr); 2555 } else { 2556 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2557 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2558 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2559 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2560 } 2561 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2562 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2563 if (!pcbddc->symmetric_primal) { 2564 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2565 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2566 } 2567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2568 2569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2571 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2572 if (!pcbddc->symmetric_primal) { 2573 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2574 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2575 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2576 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2577 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2578 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2579 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2580 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2582 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2583 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2585 } else { 2586 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2587 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2588 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2589 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2590 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2591 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2592 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2594 } 2595 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2596 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2597 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2598 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2599 if (pcbddc->benign_n) { 2600 Mat B0_B,B0_BPHI; 2601 PetscScalar *data,*data2; 2602 PetscInt j; 2603 2604 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2605 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2606 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2607 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2608 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2609 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2610 for (j=0;j<pcbddc->benign_n;j++) { 2611 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2612 for (i=0;i<pcbddc->local_primal_size;i++) { 2613 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2614 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2615 } 2616 } 2617 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2618 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2619 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2620 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2621 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2622 } 2623 #if 0 2624 { 2625 PetscViewer viewer; 2626 char filename[256]; 2627 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2628 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2629 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2630 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2631 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2632 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2633 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2634 if (save_change) { 2635 Mat phi_B; 2636 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2637 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2638 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2639 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2640 } else { 2641 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2642 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2643 } 2644 if (pcbddc->coarse_phi_D) { 2645 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2646 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2647 } 2648 if (pcbddc->coarse_psi_B) { 2649 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2650 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2651 } 2652 if (pcbddc->coarse_psi_D) { 2653 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2654 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2655 } 2656 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2657 } 2658 #endif 2659 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2660 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2661 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2662 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2663 2664 /* check constraints */ 2665 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2666 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2667 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2668 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2669 } else { 2670 PetscScalar *data; 2671 Mat tmat; 2672 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2674 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2675 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2676 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2677 } 2678 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2679 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2680 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2681 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2683 if (!pcbddc->symmetric_primal) { 2684 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2685 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2686 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2687 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2689 } 2690 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2691 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2692 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2693 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2694 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2695 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2696 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2697 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2698 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2699 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2700 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2701 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2702 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2703 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2704 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2705 if (!pcbddc->symmetric_primal) { 2706 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2707 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2708 } 2709 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2710 } 2711 /* get back data */ 2712 *coarse_submat_vals_n = coarse_submat_vals; 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2718 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2719 { 2720 Mat *work_mat; 2721 IS isrow_s,iscol_s; 2722 PetscBool rsorted,csorted; 2723 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2724 PetscErrorCode ierr; 2725 2726 PetscFunctionBegin; 2727 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2728 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2729 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2730 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2731 2732 if (!rsorted) { 2733 const PetscInt *idxs; 2734 PetscInt *idxs_sorted,i; 2735 2736 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2737 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2738 for (i=0;i<rsize;i++) { 2739 idxs_perm_r[i] = i; 2740 } 2741 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2742 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2743 for (i=0;i<rsize;i++) { 2744 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2745 } 2746 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2747 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2748 } else { 2749 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2750 isrow_s = isrow; 2751 } 2752 2753 if (!csorted) { 2754 if (isrow == iscol) { 2755 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2756 iscol_s = isrow_s; 2757 } else { 2758 const PetscInt *idxs; 2759 PetscInt *idxs_sorted,i; 2760 2761 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2762 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2763 for (i=0;i<csize;i++) { 2764 idxs_perm_c[i] = i; 2765 } 2766 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2767 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2768 for (i=0;i<csize;i++) { 2769 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2770 } 2771 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2772 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2773 } 2774 } else { 2775 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2776 iscol_s = iscol; 2777 } 2778 2779 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2780 2781 if (!rsorted || !csorted) { 2782 Mat new_mat; 2783 IS is_perm_r,is_perm_c; 2784 2785 if (!rsorted) { 2786 PetscInt *idxs_r,i; 2787 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2788 for (i=0;i<rsize;i++) { 2789 idxs_r[idxs_perm_r[i]] = i; 2790 } 2791 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2792 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2793 } else { 2794 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2795 } 2796 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2797 2798 if (!csorted) { 2799 if (isrow_s == iscol_s) { 2800 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2801 is_perm_c = is_perm_r; 2802 } else { 2803 PetscInt *idxs_c,i; 2804 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2805 for (i=0;i<csize;i++) { 2806 idxs_c[idxs_perm_c[i]] = i; 2807 } 2808 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2809 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2810 } 2811 } else { 2812 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2813 } 2814 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2815 2816 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2817 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2818 work_mat[0] = new_mat; 2819 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2820 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2821 } 2822 2823 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2824 *B = work_mat[0]; 2825 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2826 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2827 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2833 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2834 { 2835 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2836 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2837 Mat new_mat; 2838 IS is_local,is_global; 2839 PetscInt local_size; 2840 PetscBool isseqaij; 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2845 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2846 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2847 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2848 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2849 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2850 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2851 2852 /* check */ 2853 if (pcbddc->dbg_flag) { 2854 Vec x,x_change; 2855 PetscReal error; 2856 2857 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2859 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2861 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2862 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2863 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2865 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2866 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2869 ierr = VecDestroy(&x);CHKERRQ(ierr); 2870 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2871 } 2872 2873 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2874 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2875 if (isseqaij) { 2876 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2877 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2878 } else { 2879 Mat work_mat; 2880 2881 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2882 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2883 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2884 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2885 } 2886 if (matis->A->symmetric_set) { 2887 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2888 #if !defined(PETSC_USE_COMPLEX) 2889 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2890 #endif 2891 } 2892 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2893 PetscFunctionReturn(0); 2894 } 2895 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2898 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2899 { 2900 PC_IS* pcis = (PC_IS*)(pc->data); 2901 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2902 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2903 PetscInt *idx_R_local=NULL; 2904 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2905 PetscInt vbs,bs; 2906 PetscBT bitmask=NULL; 2907 PetscErrorCode ierr; 2908 2909 PetscFunctionBegin; 2910 /* 2911 No need to setup local scatters if 2912 - primal space is unchanged 2913 AND 2914 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2915 AND 2916 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2917 */ 2918 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2919 PetscFunctionReturn(0); 2920 } 2921 /* destroy old objects */ 2922 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2923 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2924 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2925 /* Set Non-overlapping dimensions */ 2926 n_B = pcis->n_B; 2927 n_D = pcis->n - n_B; 2928 n_vertices = pcbddc->n_vertices; 2929 2930 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2931 2932 /* create auxiliary bitmask and allocate workspace */ 2933 if (!sub_schurs || !sub_schurs->reuse_solver) { 2934 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2935 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2936 for (i=0;i<n_vertices;i++) { 2937 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2938 } 2939 2940 for (i=0, n_R=0; i<pcis->n; i++) { 2941 if (!PetscBTLookup(bitmask,i)) { 2942 idx_R_local[n_R++] = i; 2943 } 2944 } 2945 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2946 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2947 2948 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2949 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2950 } 2951 2952 /* Block code */ 2953 vbs = 1; 2954 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2955 if (bs>1 && !(n_vertices%bs)) { 2956 PetscBool is_blocked = PETSC_TRUE; 2957 PetscInt *vary; 2958 if (!sub_schurs || !sub_schurs->reuse_solver) { 2959 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2960 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2961 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2962 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2963 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2964 for (i=0; i<pcis->n/bs; i++) { 2965 if (vary[i]!=0 && vary[i]!=bs) { 2966 is_blocked = PETSC_FALSE; 2967 break; 2968 } 2969 } 2970 ierr = PetscFree(vary);CHKERRQ(ierr); 2971 } else { 2972 /* Verify directly the R set */ 2973 for (i=0; i<n_R/bs; i++) { 2974 PetscInt j,node=idx_R_local[bs*i]; 2975 for (j=1; j<bs; j++) { 2976 if (node != idx_R_local[bs*i+j]-j) { 2977 is_blocked = PETSC_FALSE; 2978 break; 2979 } 2980 } 2981 } 2982 } 2983 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2984 vbs = bs; 2985 for (i=0;i<n_R/vbs;i++) { 2986 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2987 } 2988 } 2989 } 2990 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2991 if (sub_schurs && sub_schurs->reuse_solver) { 2992 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2993 2994 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2995 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2996 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2997 reuse_solver->is_R = pcbddc->is_R_local; 2998 } else { 2999 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3000 } 3001 3002 /* print some info if requested */ 3003 if (pcbddc->dbg_flag) { 3004 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3005 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3006 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3007 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3008 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3009 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3010 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3011 } 3012 3013 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3014 if (!sub_schurs || !sub_schurs->reuse_solver) { 3015 IS is_aux1,is_aux2; 3016 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3017 3018 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3019 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3020 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3021 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3022 for (i=0; i<n_D; i++) { 3023 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3024 } 3025 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3026 for (i=0, j=0; i<n_R; i++) { 3027 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3028 aux_array1[j++] = i; 3029 } 3030 } 3031 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3032 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3033 for (i=0, j=0; i<n_B; i++) { 3034 if (!PetscBTLookup(bitmask,is_indices[i])) { 3035 aux_array2[j++] = i; 3036 } 3037 } 3038 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3039 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3040 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3041 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3042 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3043 3044 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3045 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3046 for (i=0, j=0; i<n_R; i++) { 3047 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3048 aux_array1[j++] = i; 3049 } 3050 } 3051 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3052 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3053 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3054 } 3055 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3056 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3057 } else { 3058 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3059 IS tis; 3060 PetscInt schur_size; 3061 3062 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3063 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3064 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3065 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3066 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3067 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3068 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3069 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3070 } 3071 } 3072 PetscFunctionReturn(0); 3073 } 3074 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3078 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3079 { 3080 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3081 PC_IS *pcis = (PC_IS*)pc->data; 3082 PC pc_temp; 3083 Mat A_RR; 3084 MatReuse reuse; 3085 PetscScalar m_one = -1.0; 3086 PetscReal value; 3087 PetscInt n_D,n_R; 3088 PetscBool check_corr[2],issbaij; 3089 PetscErrorCode ierr; 3090 /* prefixes stuff */ 3091 char dir_prefix[256],neu_prefix[256],str_level[16]; 3092 size_t len; 3093 3094 PetscFunctionBegin; 3095 3096 /* compute prefixes */ 3097 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3098 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3099 if (!pcbddc->current_level) { 3100 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3101 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3102 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3103 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3106 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3107 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3108 len -= 15; /* remove "pc_bddc_coarse_" */ 3109 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3110 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3111 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3112 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3113 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3114 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3115 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3116 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3117 } 3118 3119 /* DIRICHLET PROBLEM */ 3120 if (dirichlet) { 3121 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3122 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3123 if (!sub_schurs || !sub_schurs->reuse_solver) { 3124 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3125 } 3126 if (pcbddc->dbg_flag) { 3127 Mat A_IIn; 3128 3129 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3130 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3131 pcis->A_II = A_IIn; 3132 } 3133 } 3134 if (pcbddc->local_mat->symmetric_set) { 3135 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3136 } 3137 /* Matrix for Dirichlet problem is pcis->A_II */ 3138 n_D = pcis->n - pcis->n_B; 3139 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3140 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3141 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3142 /* default */ 3143 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3144 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3145 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3146 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3147 if (issbaij) { 3148 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3149 } else { 3150 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3151 } 3152 /* Allow user's customization */ 3153 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3154 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3155 } 3156 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3157 if (sub_schurs && sub_schurs->reuse_solver) { 3158 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3159 3160 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3161 } 3162 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3163 if (!n_D) { 3164 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3165 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3166 } 3167 /* Set Up KSP for Dirichlet problem of BDDC */ 3168 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3169 /* set ksp_D into pcis data */ 3170 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3171 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3172 pcis->ksp_D = pcbddc->ksp_D; 3173 } 3174 3175 /* NEUMANN PROBLEM */ 3176 A_RR = 0; 3177 if (neumann) { 3178 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3179 PetscInt ibs,mbs; 3180 PetscBool issbaij; 3181 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3182 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3183 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3184 if (pcbddc->ksp_R) { /* already created ksp */ 3185 PetscInt nn_R; 3186 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3187 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3188 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3189 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3190 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3191 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3192 reuse = MAT_INITIAL_MATRIX; 3193 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3194 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3195 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3196 reuse = MAT_INITIAL_MATRIX; 3197 } else { /* safe to reuse the matrix */ 3198 reuse = MAT_REUSE_MATRIX; 3199 } 3200 } 3201 /* last check */ 3202 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3203 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3204 reuse = MAT_INITIAL_MATRIX; 3205 } 3206 } else { /* first time, so we need to create the matrix */ 3207 reuse = MAT_INITIAL_MATRIX; 3208 } 3209 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3210 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3211 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3212 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3213 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3214 if (matis->A == pcbddc->local_mat) { 3215 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3216 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3217 } else { 3218 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3219 } 3220 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3221 if (matis->A == pcbddc->local_mat) { 3222 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3223 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3224 } else { 3225 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3226 } 3227 } 3228 /* extract A_RR */ 3229 if (sub_schurs && sub_schurs->reuse_solver) { 3230 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3231 3232 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3233 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3234 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3235 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3236 } else { 3237 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3238 } 3239 } else { 3240 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3241 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3242 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3243 } 3244 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3245 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3246 } 3247 if (pcbddc->local_mat->symmetric_set) { 3248 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3249 } 3250 if (!pcbddc->ksp_R) { /* create object if not present */ 3251 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3252 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3253 /* default */ 3254 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3255 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3256 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3257 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3258 if (issbaij) { 3259 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3260 } else { 3261 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3262 } 3263 /* Allow user's customization */ 3264 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3265 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3266 } 3267 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3268 if (!n_R) { 3269 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3270 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3271 } 3272 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3273 /* Reuse solver if it is present */ 3274 if (sub_schurs && sub_schurs->reuse_solver) { 3275 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3276 3277 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3278 } 3279 /* Set Up KSP for Neumann problem of BDDC */ 3280 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3281 } 3282 3283 if (pcbddc->dbg_flag) { 3284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3285 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3286 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3287 } 3288 3289 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3290 check_corr[0] = check_corr[1] = PETSC_FALSE; 3291 if (pcbddc->NullSpace_corr[0]) { 3292 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3293 } 3294 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3295 check_corr[0] = PETSC_TRUE; 3296 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3297 } 3298 if (neumann && pcbddc->NullSpace_corr[2]) { 3299 check_corr[1] = PETSC_TRUE; 3300 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3301 } 3302 3303 /* check Dirichlet and Neumann solvers */ 3304 if (pcbddc->dbg_flag) { 3305 if (dirichlet) { /* Dirichlet */ 3306 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3307 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3308 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3309 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3310 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3311 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3312 if (check_corr[0]) { 3313 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3314 } 3315 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3316 } 3317 if (neumann) { /* Neumann */ 3318 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3319 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3321 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3322 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3324 if (check_corr[1]) { 3325 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3326 } 3327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3328 } 3329 } 3330 /* free Neumann problem's matrix */ 3331 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3332 PetscFunctionReturn(0); 3333 } 3334 3335 #undef __FUNCT__ 3336 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3337 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3338 { 3339 PetscErrorCode ierr; 3340 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3341 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3342 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3343 3344 PetscFunctionBegin; 3345 if (!reuse_solver) { 3346 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3347 } 3348 if (!pcbddc->switch_static) { 3349 if (applytranspose && pcbddc->local_auxmat1) { 3350 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3351 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3352 } 3353 if (!reuse_solver) { 3354 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3355 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3356 } else { 3357 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3358 3359 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3360 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3361 } 3362 } else { 3363 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3364 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3365 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3366 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3367 if (applytranspose && pcbddc->local_auxmat1) { 3368 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3369 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3370 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3371 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3372 } 3373 } 3374 if (!reuse_solver || pcbddc->switch_static) { 3375 if (applytranspose) { 3376 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3377 } else { 3378 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3379 } 3380 } else { 3381 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3382 3383 if (applytranspose) { 3384 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3385 } else { 3386 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3387 } 3388 } 3389 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3390 if (!pcbddc->switch_static) { 3391 if (!reuse_solver) { 3392 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3393 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3394 } else { 3395 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3396 3397 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3398 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3399 } 3400 if (!applytranspose && pcbddc->local_auxmat1) { 3401 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3402 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3403 } 3404 } else { 3405 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3406 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3407 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3408 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3409 if (!applytranspose && pcbddc->local_auxmat1) { 3410 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3411 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3412 } 3413 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3414 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3417 } 3418 PetscFunctionReturn(0); 3419 } 3420 3421 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3422 #undef __FUNCT__ 3423 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3424 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3425 { 3426 PetscErrorCode ierr; 3427 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3428 PC_IS* pcis = (PC_IS*) (pc->data); 3429 const PetscScalar zero = 0.0; 3430 3431 PetscFunctionBegin; 3432 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3433 if (!pcbddc->benign_apply_coarse_only) { 3434 if (applytranspose) { 3435 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3436 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3437 } else { 3438 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3439 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3440 } 3441 } else { 3442 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3443 } 3444 3445 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3446 if (pcbddc->benign_n) { 3447 PetscScalar *array; 3448 PetscInt j; 3449 3450 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3451 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3452 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3453 } 3454 3455 /* start communications from local primal nodes to rhs of coarse solver */ 3456 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3457 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3458 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3459 3460 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3461 if (pcbddc->coarse_ksp) { 3462 Mat coarse_mat; 3463 Vec rhs,sol; 3464 MatNullSpace nullsp; 3465 PetscBool isbddc = PETSC_FALSE; 3466 3467 if (pcbddc->benign_have_null) { 3468 PC coarse_pc; 3469 3470 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3471 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3472 /* we need to propagate to coarser levels the need for a possible benign correction */ 3473 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3474 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3475 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3476 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3477 } 3478 } 3479 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3480 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3481 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3482 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3483 if (nullsp) { 3484 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3485 } 3486 if (applytranspose) { 3487 if (pcbddc->benign_apply_coarse_only) { 3488 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3489 } else { 3490 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3491 } 3492 } else { 3493 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3494 PC coarse_pc; 3495 3496 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3497 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3498 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3499 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3500 } else { 3501 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3502 } 3503 } 3504 /* we don't need the benign correction at coarser levels anymore */ 3505 if (pcbddc->benign_have_null && isbddc) { 3506 PC coarse_pc; 3507 PC_BDDC* coarsepcbddc; 3508 3509 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3510 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3511 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3512 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3513 } 3514 if (nullsp) { 3515 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3516 } 3517 } 3518 3519 /* Local solution on R nodes */ 3520 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3521 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3522 } 3523 /* communications from coarse sol to local primal nodes */ 3524 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3525 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3526 3527 /* Sum contributions from the two levels */ 3528 if (!pcbddc->benign_apply_coarse_only) { 3529 if (applytranspose) { 3530 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3531 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3532 } else { 3533 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3534 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3535 } 3536 /* store p0 */ 3537 if (pcbddc->benign_n) { 3538 PetscScalar *array; 3539 PetscInt j; 3540 3541 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3542 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3543 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3544 } 3545 } else { /* expand the coarse solution */ 3546 if (applytranspose) { 3547 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3548 } else { 3549 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3550 } 3551 } 3552 PetscFunctionReturn(0); 3553 } 3554 3555 #undef __FUNCT__ 3556 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3557 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3558 { 3559 PetscErrorCode ierr; 3560 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3561 PetscScalar *array; 3562 Vec from,to; 3563 3564 PetscFunctionBegin; 3565 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3566 from = pcbddc->coarse_vec; 3567 to = pcbddc->vec1_P; 3568 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3569 Vec tvec; 3570 3571 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3572 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3573 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3574 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3575 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3576 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3577 } 3578 } else { /* from local to global -> put data in coarse right hand side */ 3579 from = pcbddc->vec1_P; 3580 to = pcbddc->coarse_vec; 3581 } 3582 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3583 PetscFunctionReturn(0); 3584 } 3585 3586 #undef __FUNCT__ 3587 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3588 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3589 { 3590 PetscErrorCode ierr; 3591 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3592 PetscScalar *array; 3593 Vec from,to; 3594 3595 PetscFunctionBegin; 3596 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3597 from = pcbddc->coarse_vec; 3598 to = pcbddc->vec1_P; 3599 } else { /* from local to global -> put data in coarse right hand side */ 3600 from = pcbddc->vec1_P; 3601 to = pcbddc->coarse_vec; 3602 } 3603 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3604 if (smode == SCATTER_FORWARD) { 3605 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3606 Vec tvec; 3607 3608 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3609 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3610 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3611 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3612 } 3613 } else { 3614 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3615 ierr = VecResetArray(from);CHKERRQ(ierr); 3616 } 3617 } 3618 PetscFunctionReturn(0); 3619 } 3620 3621 /* uncomment for testing purposes */ 3622 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3623 #undef __FUNCT__ 3624 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3625 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3626 { 3627 PetscErrorCode ierr; 3628 PC_IS* pcis = (PC_IS*)(pc->data); 3629 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3630 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3631 /* one and zero */ 3632 PetscScalar one=1.0,zero=0.0; 3633 /* space to store constraints and their local indices */ 3634 PetscScalar *constraints_data; 3635 PetscInt *constraints_idxs,*constraints_idxs_B; 3636 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3637 PetscInt *constraints_n; 3638 /* iterators */ 3639 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3640 /* BLAS integers */ 3641 PetscBLASInt lwork,lierr; 3642 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3643 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3644 /* reuse */ 3645 PetscInt olocal_primal_size,olocal_primal_size_cc; 3646 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3647 /* change of basis */ 3648 PetscBool qr_needed; 3649 PetscBT change_basis,qr_needed_idx; 3650 /* auxiliary stuff */ 3651 PetscInt *nnz,*is_indices; 3652 PetscInt ncc; 3653 /* some quantities */ 3654 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3655 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3656 3657 PetscFunctionBegin; 3658 /* Destroy Mat objects computed previously */ 3659 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3660 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3661 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3662 /* save info on constraints from previous setup (if any) */ 3663 olocal_primal_size = pcbddc->local_primal_size; 3664 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3665 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3666 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3667 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3668 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3669 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3670 3671 if (!pcbddc->adaptive_selection) { 3672 IS ISForVertices,*ISForFaces,*ISForEdges; 3673 MatNullSpace nearnullsp; 3674 const Vec *nearnullvecs; 3675 Vec *localnearnullsp; 3676 PetscScalar *array; 3677 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3678 PetscBool nnsp_has_cnst; 3679 /* LAPACK working arrays for SVD or POD */ 3680 PetscBool skip_lapack,boolforchange; 3681 PetscScalar *work; 3682 PetscReal *singular_vals; 3683 #if defined(PETSC_USE_COMPLEX) 3684 PetscReal *rwork; 3685 #endif 3686 #if defined(PETSC_MISSING_LAPACK_GESVD) 3687 PetscScalar *temp_basis,*correlation_mat; 3688 #else 3689 PetscBLASInt dummy_int=1; 3690 PetscScalar dummy_scalar=1.; 3691 #endif 3692 3693 /* Get index sets for faces, edges and vertices from graph */ 3694 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3695 /* print some info */ 3696 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3697 PetscInt nv; 3698 3699 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3700 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3701 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3706 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3707 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3708 } 3709 3710 /* free unneeded index sets */ 3711 if (!pcbddc->use_vertices) { 3712 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3713 } 3714 if (!pcbddc->use_edges) { 3715 for (i=0;i<n_ISForEdges;i++) { 3716 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3717 } 3718 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3719 n_ISForEdges = 0; 3720 } 3721 if (!pcbddc->use_faces) { 3722 for (i=0;i<n_ISForFaces;i++) { 3723 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3724 } 3725 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3726 n_ISForFaces = 0; 3727 } 3728 3729 /* check if near null space is attached to global mat */ 3730 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3731 if (nearnullsp) { 3732 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3733 /* remove any stored info */ 3734 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3735 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3736 /* store information for BDDC solver reuse */ 3737 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3738 pcbddc->onearnullspace = nearnullsp; 3739 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3740 for (i=0;i<nnsp_size;i++) { 3741 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3742 } 3743 } else { /* if near null space is not provided BDDC uses constants by default */ 3744 nnsp_size = 0; 3745 nnsp_has_cnst = PETSC_TRUE; 3746 } 3747 /* get max number of constraints on a single cc */ 3748 max_constraints = nnsp_size; 3749 if (nnsp_has_cnst) max_constraints++; 3750 3751 /* 3752 Evaluate maximum storage size needed by the procedure 3753 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3754 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3755 There can be multiple constraints per connected component 3756 */ 3757 n_vertices = 0; 3758 if (ISForVertices) { 3759 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3760 } 3761 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3762 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3763 3764 total_counts = n_ISForFaces+n_ISForEdges; 3765 total_counts *= max_constraints; 3766 total_counts += n_vertices; 3767 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3768 3769 total_counts = 0; 3770 max_size_of_constraint = 0; 3771 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3772 IS used_is; 3773 if (i<n_ISForEdges) { 3774 used_is = ISForEdges[i]; 3775 } else { 3776 used_is = ISForFaces[i-n_ISForEdges]; 3777 } 3778 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3779 total_counts += j; 3780 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3781 } 3782 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3783 3784 /* get local part of global near null space vectors */ 3785 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3786 for (k=0;k<nnsp_size;k++) { 3787 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3788 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3789 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3790 } 3791 3792 /* whether or not to skip lapack calls */ 3793 skip_lapack = PETSC_TRUE; 3794 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3795 3796 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3797 if (!skip_lapack) { 3798 PetscScalar temp_work; 3799 3800 #if defined(PETSC_MISSING_LAPACK_GESVD) 3801 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3802 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3803 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3804 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3805 #if defined(PETSC_USE_COMPLEX) 3806 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3807 #endif 3808 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3809 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3810 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3811 lwork = -1; 3812 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3813 #if !defined(PETSC_USE_COMPLEX) 3814 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3815 #else 3816 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3817 #endif 3818 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3819 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3820 #else /* on missing GESVD */ 3821 /* SVD */ 3822 PetscInt max_n,min_n; 3823 max_n = max_size_of_constraint; 3824 min_n = max_constraints; 3825 if (max_size_of_constraint < max_constraints) { 3826 min_n = max_size_of_constraint; 3827 max_n = max_constraints; 3828 } 3829 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3830 #if defined(PETSC_USE_COMPLEX) 3831 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3832 #endif 3833 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3834 lwork = -1; 3835 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3836 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3837 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3838 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3839 #if !defined(PETSC_USE_COMPLEX) 3840 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3841 #else 3842 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3843 #endif 3844 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3845 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3846 #endif /* on missing GESVD */ 3847 /* Allocate optimal workspace */ 3848 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3849 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3850 } 3851 /* Now we can loop on constraining sets */ 3852 total_counts = 0; 3853 constraints_idxs_ptr[0] = 0; 3854 constraints_data_ptr[0] = 0; 3855 /* vertices */ 3856 if (n_vertices) { 3857 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3858 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3859 for (i=0;i<n_vertices;i++) { 3860 constraints_n[total_counts] = 1; 3861 constraints_data[total_counts] = 1.0; 3862 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3863 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3864 total_counts++; 3865 } 3866 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3867 n_vertices = total_counts; 3868 } 3869 3870 /* edges and faces */ 3871 total_counts_cc = total_counts; 3872 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3873 IS used_is; 3874 PetscBool idxs_copied = PETSC_FALSE; 3875 3876 if (ncc<n_ISForEdges) { 3877 used_is = ISForEdges[ncc]; 3878 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3879 } else { 3880 used_is = ISForFaces[ncc-n_ISForEdges]; 3881 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3882 } 3883 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3884 3885 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3886 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3887 /* change of basis should not be performed on local periodic nodes */ 3888 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3889 if (nnsp_has_cnst) { 3890 PetscScalar quad_value; 3891 3892 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3893 idxs_copied = PETSC_TRUE; 3894 3895 if (!pcbddc->use_nnsp_true) { 3896 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3897 } else { 3898 quad_value = 1.0; 3899 } 3900 for (j=0;j<size_of_constraint;j++) { 3901 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3902 } 3903 temp_constraints++; 3904 total_counts++; 3905 } 3906 for (k=0;k<nnsp_size;k++) { 3907 PetscReal real_value; 3908 PetscScalar *ptr_to_data; 3909 3910 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3911 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3912 for (j=0;j<size_of_constraint;j++) { 3913 ptr_to_data[j] = array[is_indices[j]]; 3914 } 3915 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3916 /* check if array is null on the connected component */ 3917 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3918 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3919 if (real_value > 0.0) { /* keep indices and values */ 3920 temp_constraints++; 3921 total_counts++; 3922 if (!idxs_copied) { 3923 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3924 idxs_copied = PETSC_TRUE; 3925 } 3926 } 3927 } 3928 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3929 valid_constraints = temp_constraints; 3930 if (!pcbddc->use_nnsp_true && temp_constraints) { 3931 if (temp_constraints == 1) { /* just normalize the constraint */ 3932 PetscScalar norm,*ptr_to_data; 3933 3934 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3935 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3936 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3937 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3938 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3939 } else { /* perform SVD */ 3940 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3941 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3942 3943 #if defined(PETSC_MISSING_LAPACK_GESVD) 3944 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3945 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3946 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3947 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3948 from that computed using LAPACKgesvd 3949 -> This is due to a different computation of eigenvectors in LAPACKheev 3950 -> The quality of the POD-computed basis will be the same */ 3951 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3952 /* Store upper triangular part of correlation matrix */ 3953 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3954 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3955 for (j=0;j<temp_constraints;j++) { 3956 for (k=0;k<j+1;k++) { 3957 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3958 } 3959 } 3960 /* compute eigenvalues and eigenvectors of correlation matrix */ 3961 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3962 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3963 #if !defined(PETSC_USE_COMPLEX) 3964 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3965 #else 3966 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3967 #endif 3968 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3969 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3970 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3971 j = 0; 3972 while (j < temp_constraints && singular_vals[j] < tol) j++; 3973 total_counts = total_counts-j; 3974 valid_constraints = temp_constraints-j; 3975 /* scale and copy POD basis into used quadrature memory */ 3976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3977 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3979 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3980 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3982 if (j<temp_constraints) { 3983 PetscInt ii; 3984 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3986 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3987 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3988 for (k=0;k<temp_constraints-j;k++) { 3989 for (ii=0;ii<size_of_constraint;ii++) { 3990 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3991 } 3992 } 3993 } 3994 #else /* on missing GESVD */ 3995 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3996 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3997 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3998 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3999 #if !defined(PETSC_USE_COMPLEX) 4000 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4001 #else 4002 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4003 #endif 4004 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4005 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4006 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4007 k = temp_constraints; 4008 if (k > size_of_constraint) k = size_of_constraint; 4009 j = 0; 4010 while (j < k && singular_vals[k-j-1] < tol) j++; 4011 valid_constraints = k-j; 4012 total_counts = total_counts-temp_constraints+valid_constraints; 4013 #endif /* on missing GESVD */ 4014 } 4015 } 4016 /* update pointers information */ 4017 if (valid_constraints) { 4018 constraints_n[total_counts_cc] = valid_constraints; 4019 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4020 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4021 /* set change_of_basis flag */ 4022 if (boolforchange) { 4023 PetscBTSet(change_basis,total_counts_cc); 4024 } 4025 total_counts_cc++; 4026 } 4027 } 4028 /* free workspace */ 4029 if (!skip_lapack) { 4030 ierr = PetscFree(work);CHKERRQ(ierr); 4031 #if defined(PETSC_USE_COMPLEX) 4032 ierr = PetscFree(rwork);CHKERRQ(ierr); 4033 #endif 4034 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4035 #if defined(PETSC_MISSING_LAPACK_GESVD) 4036 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4037 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4038 #endif 4039 } 4040 for (k=0;k<nnsp_size;k++) { 4041 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4042 } 4043 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4044 /* free index sets of faces, edges and vertices */ 4045 for (i=0;i<n_ISForFaces;i++) { 4046 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4047 } 4048 if (n_ISForFaces) { 4049 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4050 } 4051 for (i=0;i<n_ISForEdges;i++) { 4052 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4053 } 4054 if (n_ISForEdges) { 4055 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4056 } 4057 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4058 } else { 4059 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4060 4061 total_counts = 0; 4062 n_vertices = 0; 4063 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4064 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4065 } 4066 max_constraints = 0; 4067 total_counts_cc = 0; 4068 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4069 total_counts += pcbddc->adaptive_constraints_n[i]; 4070 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4071 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4072 } 4073 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4074 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4075 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4076 constraints_data = pcbddc->adaptive_constraints_data; 4077 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4078 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4079 total_counts_cc = 0; 4080 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4081 if (pcbddc->adaptive_constraints_n[i]) { 4082 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4083 } 4084 } 4085 #if 0 4086 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4087 for (i=0;i<total_counts_cc;i++) { 4088 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4089 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4090 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4091 printf(" %d",constraints_idxs[j]); 4092 } 4093 printf("\n"); 4094 printf("number of cc: %d\n",constraints_n[i]); 4095 } 4096 for (i=0;i<n_vertices;i++) { 4097 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4098 } 4099 for (i=0;i<sub_schurs->n_subs;i++) { 4100 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4101 } 4102 #endif 4103 4104 max_size_of_constraint = 0; 4105 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4106 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4107 /* Change of basis */ 4108 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4109 if (pcbddc->use_change_of_basis) { 4110 for (i=0;i<sub_schurs->n_subs;i++) { 4111 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4112 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4113 } 4114 } 4115 } 4116 } 4117 pcbddc->local_primal_size = total_counts; 4118 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4119 4120 /* map constraints_idxs in boundary numbering */ 4121 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4122 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4123 4124 /* Create constraint matrix */ 4125 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4126 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4127 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4128 4129 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4130 /* determine if a QR strategy is needed for change of basis */ 4131 qr_needed = PETSC_FALSE; 4132 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4133 total_primal_vertices=0; 4134 pcbddc->local_primal_size_cc = 0; 4135 for (i=0;i<total_counts_cc;i++) { 4136 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4137 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4138 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4139 pcbddc->local_primal_size_cc += 1; 4140 } else if (PetscBTLookup(change_basis,i)) { 4141 for (k=0;k<constraints_n[i];k++) { 4142 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4143 } 4144 pcbddc->local_primal_size_cc += constraints_n[i]; 4145 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4146 PetscBTSet(qr_needed_idx,i); 4147 qr_needed = PETSC_TRUE; 4148 } 4149 } else { 4150 pcbddc->local_primal_size_cc += 1; 4151 } 4152 } 4153 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4154 pcbddc->n_vertices = total_primal_vertices; 4155 /* permute indices in order to have a sorted set of vertices */ 4156 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4157 4158 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4159 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4160 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4161 4162 /* nonzero structure of constraint matrix */ 4163 /* and get reference dof for local constraints */ 4164 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4165 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4166 4167 j = total_primal_vertices; 4168 total_counts = total_primal_vertices; 4169 cum = total_primal_vertices; 4170 for (i=n_vertices;i<total_counts_cc;i++) { 4171 if (!PetscBTLookup(change_basis,i)) { 4172 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4173 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4174 cum++; 4175 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4176 for (k=0;k<constraints_n[i];k++) { 4177 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4178 nnz[j+k] = size_of_constraint; 4179 } 4180 j += constraints_n[i]; 4181 } 4182 } 4183 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4184 ierr = PetscFree(nnz);CHKERRQ(ierr); 4185 4186 /* set values in constraint matrix */ 4187 for (i=0;i<total_primal_vertices;i++) { 4188 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4189 } 4190 total_counts = total_primal_vertices; 4191 for (i=n_vertices;i<total_counts_cc;i++) { 4192 if (!PetscBTLookup(change_basis,i)) { 4193 PetscInt *cols; 4194 4195 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4196 cols = constraints_idxs+constraints_idxs_ptr[i]; 4197 for (k=0;k<constraints_n[i];k++) { 4198 PetscInt row = total_counts+k; 4199 PetscScalar *vals; 4200 4201 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4202 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4203 } 4204 total_counts += constraints_n[i]; 4205 } 4206 } 4207 /* assembling */ 4208 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4209 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4210 4211 /* 4212 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4213 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4214 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4215 */ 4216 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4217 if (pcbddc->use_change_of_basis) { 4218 /* dual and primal dofs on a single cc */ 4219 PetscInt dual_dofs,primal_dofs; 4220 /* working stuff for GEQRF */ 4221 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4222 PetscBLASInt lqr_work; 4223 /* working stuff for UNGQR */ 4224 PetscScalar *gqr_work,lgqr_work_t; 4225 PetscBLASInt lgqr_work; 4226 /* working stuff for TRTRS */ 4227 PetscScalar *trs_rhs; 4228 PetscBLASInt Blas_NRHS; 4229 /* pointers for values insertion into change of basis matrix */ 4230 PetscInt *start_rows,*start_cols; 4231 PetscScalar *start_vals; 4232 /* working stuff for values insertion */ 4233 PetscBT is_primal; 4234 PetscInt *aux_primal_numbering_B; 4235 /* matrix sizes */ 4236 PetscInt global_size,local_size; 4237 /* temporary change of basis */ 4238 Mat localChangeOfBasisMatrix; 4239 /* extra space for debugging */ 4240 PetscScalar *dbg_work; 4241 4242 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4243 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4244 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4245 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4246 /* nonzeros for local mat */ 4247 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4248 if (!pcbddc->benign_change || pcbddc->fake_change) { 4249 for (i=0;i<pcis->n;i++) nnz[i]=1; 4250 } else { 4251 const PetscInt *ii; 4252 PetscInt n; 4253 PetscBool flg_row; 4254 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4255 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4256 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4257 } 4258 for (i=n_vertices;i<total_counts_cc;i++) { 4259 if (PetscBTLookup(change_basis,i)) { 4260 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4261 if (PetscBTLookup(qr_needed_idx,i)) { 4262 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4263 } else { 4264 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4265 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4266 } 4267 } 4268 } 4269 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4270 ierr = PetscFree(nnz);CHKERRQ(ierr); 4271 /* Set interior change in the matrix */ 4272 if (!pcbddc->benign_change || pcbddc->fake_change) { 4273 for (i=0;i<pcis->n;i++) { 4274 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4275 } 4276 } else { 4277 const PetscInt *ii,*jj; 4278 PetscScalar *aa; 4279 PetscInt n; 4280 PetscBool flg_row; 4281 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4282 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4283 for (i=0;i<n;i++) { 4284 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4285 } 4286 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4287 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4288 } 4289 4290 if (pcbddc->dbg_flag) { 4291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4292 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4293 } 4294 4295 4296 /* Now we loop on the constraints which need a change of basis */ 4297 /* 4298 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4299 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4300 4301 Basic blocks of change of basis matrix T computed by 4302 4303 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4304 4305 | 1 0 ... 0 s_1/S | 4306 | 0 1 ... 0 s_2/S | 4307 | ... | 4308 | 0 ... 1 s_{n-1}/S | 4309 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4310 4311 with S = \sum_{i=1}^n s_i^2 4312 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4313 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4314 4315 - QR decomposition of constraints otherwise 4316 */ 4317 if (qr_needed) { 4318 /* space to store Q */ 4319 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4320 /* first we issue queries for optimal work */ 4321 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4322 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4323 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4324 lqr_work = -1; 4325 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4326 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4327 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4328 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4329 lgqr_work = -1; 4330 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4331 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4332 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4333 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4334 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4335 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4336 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4337 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4338 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4339 /* array to store scaling factors for reflectors */ 4340 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4341 /* array to store rhs and solution of triangular solver */ 4342 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4343 /* allocating workspace for check */ 4344 if (pcbddc->dbg_flag) { 4345 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4346 } 4347 } 4348 /* array to store whether a node is primal or not */ 4349 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4350 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4351 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4352 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4353 for (i=0;i<total_primal_vertices;i++) { 4354 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4355 } 4356 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4357 4358 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4359 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4360 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4361 if (PetscBTLookup(change_basis,total_counts)) { 4362 /* get constraint info */ 4363 primal_dofs = constraints_n[total_counts]; 4364 dual_dofs = size_of_constraint-primal_dofs; 4365 4366 if (pcbddc->dbg_flag) { 4367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4368 } 4369 4370 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4371 4372 /* copy quadrature constraints for change of basis check */ 4373 if (pcbddc->dbg_flag) { 4374 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4375 } 4376 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4377 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4378 4379 /* compute QR decomposition of constraints */ 4380 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4381 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4382 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4383 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4384 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4385 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4386 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4387 4388 /* explictly compute R^-T */ 4389 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4390 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4391 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4392 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4393 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4394 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4395 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4396 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4397 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4398 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4399 4400 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4401 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4402 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4403 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4405 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4406 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4407 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4408 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4409 4410 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4411 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4412 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4413 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4414 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4415 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4417 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4418 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4419 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4420 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4421 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4422 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4423 4424 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4425 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4426 /* insert cols for primal dofs */ 4427 for (j=0;j<primal_dofs;j++) { 4428 start_vals = &qr_basis[j*size_of_constraint]; 4429 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4430 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4431 } 4432 /* insert cols for dual dofs */ 4433 for (j=0,k=0;j<dual_dofs;k++) { 4434 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4435 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4436 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4437 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4438 j++; 4439 } 4440 } 4441 4442 /* check change of basis */ 4443 if (pcbddc->dbg_flag) { 4444 PetscInt ii,jj; 4445 PetscBool valid_qr=PETSC_TRUE; 4446 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4447 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4449 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4450 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4451 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4452 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4453 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4454 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4455 for (jj=0;jj<size_of_constraint;jj++) { 4456 for (ii=0;ii<primal_dofs;ii++) { 4457 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4458 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4459 } 4460 } 4461 if (!valid_qr) { 4462 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4463 for (jj=0;jj<size_of_constraint;jj++) { 4464 for (ii=0;ii<primal_dofs;ii++) { 4465 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4466 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4467 } 4468 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4469 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4470 } 4471 } 4472 } 4473 } else { 4474 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4475 } 4476 } 4477 } else { /* simple transformation block */ 4478 PetscInt row,col; 4479 PetscScalar val,norm; 4480 4481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4482 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4483 for (j=0;j<size_of_constraint;j++) { 4484 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4485 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4486 if (!PetscBTLookup(is_primal,row_B)) { 4487 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4488 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4489 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4490 } else { 4491 for (k=0;k<size_of_constraint;k++) { 4492 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4493 if (row != col) { 4494 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4495 } else { 4496 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4497 } 4498 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4499 } 4500 } 4501 } 4502 if (pcbddc->dbg_flag) { 4503 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4504 } 4505 } 4506 } else { 4507 if (pcbddc->dbg_flag) { 4508 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4509 } 4510 } 4511 } 4512 4513 /* free workspace */ 4514 if (qr_needed) { 4515 if (pcbddc->dbg_flag) { 4516 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4517 } 4518 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4519 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4520 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4521 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4522 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4523 } 4524 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4525 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4526 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4527 4528 /* assembling of global change of variable */ 4529 if (!pcbddc->fake_change) { 4530 Mat tmat; 4531 PetscInt bs; 4532 4533 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4534 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4535 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4536 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4537 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4538 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4539 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4540 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4541 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4542 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4543 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4544 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4545 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4546 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4547 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4548 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4549 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4550 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4551 4552 /* check */ 4553 if (pcbddc->dbg_flag) { 4554 PetscReal error; 4555 Vec x,x_change; 4556 4557 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4558 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4559 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4560 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4561 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4562 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4563 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4564 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4565 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4566 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4567 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4568 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4569 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4571 ierr = VecDestroy(&x);CHKERRQ(ierr); 4572 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4573 } 4574 /* adapt sub_schurs computed (if any) */ 4575 if (pcbddc->use_deluxe_scaling) { 4576 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4577 4578 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4579 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4580 } 4581 if (sub_schurs && sub_schurs->S_Ej_all) { 4582 Mat S_new,tmat; 4583 IS is_all_N,is_V_Sall = NULL; 4584 4585 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4586 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4587 if (pcbddc->deluxe_zerorows) { 4588 ISLocalToGlobalMapping NtoSall; 4589 IS is_V; 4590 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4591 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4592 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4593 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4594 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4595 } 4596 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4597 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4598 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4599 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4600 if (pcbddc->deluxe_zerorows) { 4601 const PetscScalar *array; 4602 const PetscInt *idxs_V,*idxs_all; 4603 PetscInt i,n_V; 4604 4605 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4606 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4607 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4608 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4609 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4610 for (i=0;i<n_V;i++) { 4611 PetscScalar val; 4612 PetscInt idx; 4613 4614 idx = idxs_V[i]; 4615 val = array[idxs_all[idxs_V[i]]]; 4616 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4617 } 4618 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4619 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4620 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4621 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4622 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4623 } 4624 sub_schurs->S_Ej_all = S_new; 4625 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4626 if (sub_schurs->sum_S_Ej_all) { 4627 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4628 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4629 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4630 if (pcbddc->deluxe_zerorows) { 4631 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4632 } 4633 sub_schurs->sum_S_Ej_all = S_new; 4634 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4635 } 4636 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4637 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4638 } 4639 /* destroy any change of basis context in sub_schurs */ 4640 if (sub_schurs && sub_schurs->change) { 4641 PetscInt i; 4642 4643 for (i=0;i<sub_schurs->n_subs;i++) { 4644 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4645 } 4646 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4647 } 4648 } 4649 if (pcbddc->switch_static) { /* need to save the local change */ 4650 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4651 } else { 4652 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4653 } 4654 /* determine if any process has changed the pressures locally */ 4655 pcbddc->change_interior = pcbddc->benign_have_null; 4656 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4657 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4658 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4659 pcbddc->use_qr_single = qr_needed; 4660 } 4661 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4662 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4663 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4664 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4665 } else { 4666 Mat benign_global = NULL; 4667 if (pcbddc->benign_have_null) { 4668 Mat tmat; 4669 4670 pcbddc->change_interior = PETSC_TRUE; 4671 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4672 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4673 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4674 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4675 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4676 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4677 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4678 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4679 if (pcbddc->benign_change) { 4680 Mat M; 4681 4682 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4683 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4684 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4685 ierr = MatDestroy(&M);CHKERRQ(ierr); 4686 } else { 4687 Mat eye; 4688 PetscScalar *array; 4689 4690 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4691 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4692 for (i=0;i<pcis->n;i++) { 4693 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4694 } 4695 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4696 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4697 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4699 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4700 } 4701 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4702 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4703 } 4704 if (pcbddc->user_ChangeOfBasisMatrix) { 4705 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4706 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4707 } else if (pcbddc->benign_have_null) { 4708 pcbddc->ChangeOfBasisMatrix = benign_global; 4709 } 4710 } 4711 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4712 IS is_global; 4713 const PetscInt *gidxs; 4714 4715 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4716 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4717 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4718 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4719 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4720 } 4721 } 4722 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4723 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4724 } 4725 4726 if (!pcbddc->fake_change) { 4727 /* add pressure dofs to set of primal nodes for numbering purposes */ 4728 for (i=0;i<pcbddc->benign_n;i++) { 4729 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4730 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4731 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4732 pcbddc->local_primal_size_cc++; 4733 pcbddc->local_primal_size++; 4734 } 4735 4736 /* check if a new primal space has been introduced (also take into account benign trick) */ 4737 pcbddc->new_primal_space_local = PETSC_TRUE; 4738 if (olocal_primal_size == pcbddc->local_primal_size) { 4739 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4740 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4741 if (!pcbddc->new_primal_space_local) { 4742 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4743 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4744 } 4745 } 4746 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4747 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4748 } 4749 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4750 4751 /* flush dbg viewer */ 4752 if (pcbddc->dbg_flag) { 4753 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4754 } 4755 4756 /* free workspace */ 4757 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4758 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4759 if (!pcbddc->adaptive_selection) { 4760 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4761 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4762 } else { 4763 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4764 pcbddc->adaptive_constraints_idxs_ptr, 4765 pcbddc->adaptive_constraints_data_ptr, 4766 pcbddc->adaptive_constraints_idxs, 4767 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4768 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4769 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4770 } 4771 PetscFunctionReturn(0); 4772 } 4773 4774 #undef __FUNCT__ 4775 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4776 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4777 { 4778 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4779 PC_IS *pcis = (PC_IS*)pc->data; 4780 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4781 PetscInt ierr,i,N; 4782 4783 PetscFunctionBegin; 4784 /* Reset previously computed graph */ 4785 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4786 /* Init local Graph struct */ 4787 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4788 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4789 4790 /* Check validity of the csr graph passed in by the user */ 4791 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4792 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4793 } 4794 4795 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4796 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4797 PetscInt *xadj,*adjncy; 4798 PetscInt nvtxs; 4799 PetscBool flg_row=PETSC_FALSE; 4800 4801 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4802 if (flg_row) { 4803 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4804 pcbddc->computed_rowadj = PETSC_TRUE; 4805 } 4806 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4807 } 4808 if (pcbddc->dbg_flag) { 4809 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4810 } 4811 4812 /* Setup of Graph */ 4813 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4814 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4815 4816 /* attach info on disconnected subdomains if present */ 4817 if (pcbddc->n_local_subs) { 4818 PetscInt *local_subs; 4819 4820 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4821 for (i=0;i<pcbddc->n_local_subs;i++) { 4822 const PetscInt *idxs; 4823 PetscInt nl,j; 4824 4825 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4826 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4827 for (j=0;j<nl;j++) { 4828 local_subs[idxs[j]] = i; 4829 } 4830 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4831 } 4832 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4833 pcbddc->mat_graph->local_subs = local_subs; 4834 } 4835 4836 /* Graph's connected components analysis */ 4837 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4838 PetscFunctionReturn(0); 4839 } 4840 4841 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4842 #undef __FUNCT__ 4843 #define __FUNCT__ "PCBDDCSubsetNumbering" 4844 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4845 { 4846 PetscSF sf; 4847 PetscLayout map; 4848 const PetscInt *idxs; 4849 PetscInt *leaf_data,*root_data,*gidxs; 4850 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4851 PetscInt n_n,nlocals,start,first_index; 4852 PetscMPIInt commsize; 4853 PetscBool first_found; 4854 PetscErrorCode ierr; 4855 4856 PetscFunctionBegin; 4857 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4858 if (subset_mult) { 4859 PetscCheckSameComm(subset,1,subset_mult,2); 4860 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4861 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4862 } 4863 /* create workspace layout for computing global indices of subset */ 4864 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4865 lbounds[0] = lbounds[1] = 0; 4866 for (i=0;i<n;i++) { 4867 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4868 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4869 } 4870 lbounds[0] = -lbounds[0]; 4871 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4872 gbounds[0] = -gbounds[0]; 4873 N = gbounds[1] - gbounds[0] + 1; 4874 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4875 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4876 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4877 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4878 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4879 4880 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4881 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4882 if (subset_mult) { 4883 const PetscInt* idxs_mult; 4884 4885 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4886 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4887 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4888 } else { 4889 for (i=0;i<n;i++) leaf_data[i] = 1; 4890 } 4891 /* local size of new subset */ 4892 n_n = 0; 4893 for (i=0;i<n;i++) n_n += leaf_data[i]; 4894 4895 /* global indexes in layout */ 4896 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4897 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4898 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4899 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4900 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4901 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4902 4903 /* reduce from leaves to roots */ 4904 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4905 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4906 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4907 4908 /* count indexes in local part of layout */ 4909 nlocals = 0; 4910 first_index = -1; 4911 first_found = PETSC_FALSE; 4912 for (i=0;i<Nl;i++) { 4913 if (!first_found && root_data[i]) { 4914 first_found = PETSC_TRUE; 4915 first_index = i; 4916 } 4917 nlocals += root_data[i]; 4918 } 4919 4920 /* cumulative of number of indexes and size of subset without holes */ 4921 #if defined(PETSC_HAVE_MPI_EXSCAN) 4922 start = 0; 4923 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4924 #else 4925 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4926 start = start-nlocals; 4927 #endif 4928 4929 if (N_n) { /* compute total size of new subset if requested */ 4930 *N_n = start + nlocals; 4931 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4932 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4933 } 4934 4935 /* adapt root data with cumulative */ 4936 if (first_found) { 4937 PetscInt old_index; 4938 4939 root_data[first_index] += start; 4940 old_index = first_index; 4941 for (i=first_index+1;i<Nl;i++) { 4942 if (root_data[i]) { 4943 root_data[i] += root_data[old_index]; 4944 old_index = i; 4945 } 4946 } 4947 } 4948 4949 /* from roots to leaves */ 4950 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4951 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4952 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4953 4954 /* create new IS with global indexes without holes */ 4955 if (subset_mult) { 4956 const PetscInt* idxs_mult; 4957 PetscInt cum; 4958 4959 cum = 0; 4960 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4961 for (i=0;i<n;i++) { 4962 PetscInt j; 4963 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4964 } 4965 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4966 } else { 4967 for (i=0;i<n;i++) { 4968 gidxs[i] = leaf_data[i]-1; 4969 } 4970 } 4971 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4972 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4973 PetscFunctionReturn(0); 4974 } 4975 4976 /* this implements stabilized Gram-Schmidt */ 4977 #undef __FUNCT__ 4978 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4979 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4980 { 4981 PetscInt i,j; 4982 PetscScalar *alphas; 4983 PetscErrorCode ierr; 4984 4985 PetscFunctionBegin; 4986 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4987 for (i=0;i<n;i++) { 4988 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4989 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4990 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4991 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4992 } 4993 ierr = PetscFree(alphas);CHKERRQ(ierr); 4994 PetscFunctionReturn(0); 4995 } 4996 4997 #undef __FUNCT__ 4998 #define __FUNCT__ "MatISGetSubassemblingPattern" 4999 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5000 { 5001 Mat A; 5002 PetscInt n_neighs,*neighs,*n_shared,**shared; 5003 PetscMPIInt size,rank,color; 5004 PetscInt *xadj,*adjncy; 5005 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5006 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5007 PetscInt void_procs,*procs_candidates = NULL; 5008 PetscInt xadj_count, *count; 5009 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5010 PetscSubcomm psubcomm; 5011 MPI_Comm subcomm; 5012 PetscErrorCode ierr; 5013 5014 PetscFunctionBegin; 5015 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5016 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5017 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5018 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5019 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5020 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5021 5022 if (have_void) *have_void = PETSC_FALSE; 5023 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5024 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5025 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5026 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5027 im_active = !!(n); 5028 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5029 void_procs = size - active_procs; 5030 /* get ranks of of non-active processes in mat communicator */ 5031 if (void_procs) { 5032 PetscInt ncand; 5033 5034 if (have_void) *have_void = PETSC_TRUE; 5035 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5036 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5037 for (i=0,ncand=0;i<size;i++) { 5038 if (!procs_candidates[i]) { 5039 procs_candidates[ncand++] = i; 5040 } 5041 } 5042 /* force n_subdomains to be not greater that the number of non-active processes */ 5043 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5044 } 5045 5046 /* number of subdomains requested greater than active processes -> just shift the matrix */ 5047 if (active_procs < *n_subdomains) { 5048 PetscInt issize,isidx; 5049 if (im_active) { 5050 issize = 1; 5051 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5052 isidx = procs_candidates[rank]; 5053 } else { 5054 isidx = rank; 5055 } 5056 } else { 5057 issize = 0; 5058 isidx = -1; 5059 } 5060 *n_subdomains = active_procs; 5061 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5062 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5063 PetscFunctionReturn(0); 5064 } 5065 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5066 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5067 threshold = PetscMax(threshold,2); 5068 5069 /* Get info on mapping */ 5070 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5071 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5072 5073 /* build local CSR graph of subdomains' connectivity */ 5074 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5075 xadj[0] = 0; 5076 xadj[1] = PetscMax(n_neighs-1,0); 5077 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5078 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5079 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5080 for (i=1;i<n_neighs;i++) 5081 for (j=0;j<n_shared[i];j++) 5082 count[shared[i][j]] += 1; 5083 5084 xadj_count = 0; 5085 for (i=1;i<n_neighs;i++) { 5086 for (j=0;j<n_shared[i];j++) { 5087 if (count[shared[i][j]] < threshold) { 5088 adjncy[xadj_count] = neighs[i]; 5089 adjncy_wgt[xadj_count] = n_shared[i]; 5090 xadj_count++; 5091 break; 5092 } 5093 } 5094 } 5095 xadj[1] = xadj_count; 5096 ierr = PetscFree(count);CHKERRQ(ierr); 5097 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5098 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5099 5100 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5101 5102 /* Restrict work on active processes only */ 5103 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5104 if (void_procs) { 5105 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5106 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5107 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5108 subcomm = PetscSubcommChild(psubcomm); 5109 } else { 5110 psubcomm = NULL; 5111 subcomm = PetscObjectComm((PetscObject)mat); 5112 } 5113 5114 v_wgt = NULL; 5115 if (!color) { 5116 ierr = PetscFree(xadj);CHKERRQ(ierr); 5117 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5118 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5119 } else { 5120 Mat subdomain_adj; 5121 IS new_ranks,new_ranks_contig; 5122 MatPartitioning partitioner; 5123 PetscInt rstart=0,rend=0; 5124 PetscInt *is_indices,*oldranks; 5125 PetscMPIInt size; 5126 PetscBool aggregate; 5127 5128 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5129 if (void_procs) { 5130 PetscInt prank = rank; 5131 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5132 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5133 for (i=0;i<xadj[1];i++) { 5134 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5135 } 5136 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5137 } else { 5138 oldranks = NULL; 5139 } 5140 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5141 if (aggregate) { /* TODO: all this part could be made more efficient */ 5142 PetscInt lrows,row,ncols,*cols; 5143 PetscMPIInt nrank; 5144 PetscScalar *vals; 5145 5146 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5147 lrows = 0; 5148 if (nrank<redprocs) { 5149 lrows = size/redprocs; 5150 if (nrank<size%redprocs) lrows++; 5151 } 5152 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5153 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5154 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5155 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5156 row = nrank; 5157 ncols = xadj[1]-xadj[0]; 5158 cols = adjncy; 5159 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5160 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5161 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5162 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5163 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5164 ierr = PetscFree(xadj);CHKERRQ(ierr); 5165 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5166 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5167 ierr = PetscFree(vals);CHKERRQ(ierr); 5168 if (use_vwgt) { 5169 Vec v; 5170 const PetscScalar *array; 5171 PetscInt nl; 5172 5173 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5174 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5175 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5176 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5177 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5178 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5179 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5180 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5181 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5182 ierr = VecDestroy(&v);CHKERRQ(ierr); 5183 } 5184 } else { 5185 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5186 if (use_vwgt) { 5187 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5188 v_wgt[0] = local_size; 5189 } 5190 } 5191 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5192 5193 /* Partition */ 5194 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5195 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5196 if (v_wgt) { 5197 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5198 } 5199 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5200 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5201 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5202 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5203 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5204 5205 /* renumber new_ranks to avoid "holes" in new set of processors */ 5206 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5207 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5208 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5209 if (!aggregate) { 5210 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5211 #if defined(PETSC_USE_DEBUG) 5212 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5213 #endif 5214 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5215 } else if (oldranks) { 5216 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5217 } else { 5218 ranks_send_to_idx[0] = is_indices[0]; 5219 } 5220 } else { 5221 PetscInt idxs[1]; 5222 PetscMPIInt tag; 5223 MPI_Request *reqs; 5224 5225 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5226 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5227 for (i=rstart;i<rend;i++) { 5228 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5229 } 5230 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5231 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5232 ierr = PetscFree(reqs);CHKERRQ(ierr); 5233 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5234 #if defined(PETSC_USE_DEBUG) 5235 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5236 #endif 5237 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5238 } else if (oldranks) { 5239 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5240 } else { 5241 ranks_send_to_idx[0] = idxs[0]; 5242 } 5243 } 5244 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5245 /* clean up */ 5246 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5247 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5248 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5249 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5250 } 5251 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5252 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5253 5254 /* assemble parallel IS for sends */ 5255 i = 1; 5256 if (!color) i=0; 5257 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5258 PetscFunctionReturn(0); 5259 } 5260 5261 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5262 5263 #undef __FUNCT__ 5264 #define __FUNCT__ "MatISSubassemble" 5265 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5266 { 5267 Mat local_mat; 5268 IS is_sends_internal; 5269 PetscInt rows,cols,new_local_rows; 5270 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5271 PetscBool ismatis,isdense,newisdense,destroy_mat; 5272 ISLocalToGlobalMapping l2gmap; 5273 PetscInt* l2gmap_indices; 5274 const PetscInt* is_indices; 5275 MatType new_local_type; 5276 /* buffers */ 5277 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5278 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5279 PetscInt *recv_buffer_idxs_local; 5280 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5281 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5282 /* MPI */ 5283 MPI_Comm comm,comm_n; 5284 PetscSubcomm subcomm; 5285 PetscMPIInt n_sends,n_recvs,commsize; 5286 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5287 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5288 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5289 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5290 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5291 PetscErrorCode ierr; 5292 5293 PetscFunctionBegin; 5294 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5295 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5296 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5297 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5298 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5299 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5300 PetscValidLogicalCollectiveBool(mat,reuse,6); 5301 PetscValidLogicalCollectiveInt(mat,nis,8); 5302 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5303 if (nvecs) { 5304 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5305 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5306 } 5307 /* further checks */ 5308 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5309 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5310 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5311 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5312 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5313 if (reuse && *mat_n) { 5314 PetscInt mrows,mcols,mnrows,mncols; 5315 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5316 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5317 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5318 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5319 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5320 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5321 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5322 } 5323 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5324 PetscValidLogicalCollectiveInt(mat,bs,0); 5325 5326 /* prepare IS for sending if not provided */ 5327 if (!is_sends) { 5328 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5329 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5330 } else { 5331 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5332 is_sends_internal = is_sends; 5333 } 5334 5335 /* get comm */ 5336 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5337 5338 /* compute number of sends */ 5339 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5340 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5341 5342 /* compute number of receives */ 5343 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5344 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5345 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5346 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5347 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5348 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5349 ierr = PetscFree(iflags);CHKERRQ(ierr); 5350 5351 /* restrict comm if requested */ 5352 subcomm = 0; 5353 destroy_mat = PETSC_FALSE; 5354 if (restrict_comm) { 5355 PetscMPIInt color,subcommsize; 5356 5357 color = 0; 5358 if (restrict_full) { 5359 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5360 } else { 5361 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5362 } 5363 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5364 subcommsize = commsize - subcommsize; 5365 /* check if reuse has been requested */ 5366 if (reuse) { 5367 if (*mat_n) { 5368 PetscMPIInt subcommsize2; 5369 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5370 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5371 comm_n = PetscObjectComm((PetscObject)*mat_n); 5372 } else { 5373 comm_n = PETSC_COMM_SELF; 5374 } 5375 } else { /* MAT_INITIAL_MATRIX */ 5376 PetscMPIInt rank; 5377 5378 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5379 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5380 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5381 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5382 comm_n = PetscSubcommChild(subcomm); 5383 } 5384 /* flag to destroy *mat_n if not significative */ 5385 if (color) destroy_mat = PETSC_TRUE; 5386 } else { 5387 comm_n = comm; 5388 } 5389 5390 /* prepare send/receive buffers */ 5391 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5392 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5393 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5394 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5395 if (nis) { 5396 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5397 } 5398 5399 /* Get data from local matrices */ 5400 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5401 /* TODO: See below some guidelines on how to prepare the local buffers */ 5402 /* 5403 send_buffer_vals should contain the raw values of the local matrix 5404 send_buffer_idxs should contain: 5405 - MatType_PRIVATE type 5406 - PetscInt size_of_l2gmap 5407 - PetscInt global_row_indices[size_of_l2gmap] 5408 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5409 */ 5410 else { 5411 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5412 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5413 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5414 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5415 send_buffer_idxs[1] = i; 5416 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5417 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5418 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5419 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5420 for (i=0;i<n_sends;i++) { 5421 ilengths_vals[is_indices[i]] = len*len; 5422 ilengths_idxs[is_indices[i]] = len+2; 5423 } 5424 } 5425 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5426 /* additional is (if any) */ 5427 if (nis) { 5428 PetscMPIInt psum; 5429 PetscInt j; 5430 for (j=0,psum=0;j<nis;j++) { 5431 PetscInt plen; 5432 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5433 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5434 psum += len+1; /* indices + lenght */ 5435 } 5436 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5437 for (j=0,psum=0;j<nis;j++) { 5438 PetscInt plen; 5439 const PetscInt *is_array_idxs; 5440 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5441 send_buffer_idxs_is[psum] = plen; 5442 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5443 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5444 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5445 psum += plen+1; /* indices + lenght */ 5446 } 5447 for (i=0;i<n_sends;i++) { 5448 ilengths_idxs_is[is_indices[i]] = psum; 5449 } 5450 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5451 } 5452 5453 buf_size_idxs = 0; 5454 buf_size_vals = 0; 5455 buf_size_idxs_is = 0; 5456 buf_size_vecs = 0; 5457 for (i=0;i<n_recvs;i++) { 5458 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5459 buf_size_vals += (PetscInt)olengths_vals[i]; 5460 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5461 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5462 } 5463 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5464 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5465 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5466 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5467 5468 /* get new tags for clean communications */ 5469 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5470 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5471 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5472 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5473 5474 /* allocate for requests */ 5475 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5476 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5477 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5478 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5479 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5480 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5481 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5482 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5483 5484 /* communications */ 5485 ptr_idxs = recv_buffer_idxs; 5486 ptr_vals = recv_buffer_vals; 5487 ptr_idxs_is = recv_buffer_idxs_is; 5488 ptr_vecs = recv_buffer_vecs; 5489 for (i=0;i<n_recvs;i++) { 5490 source_dest = onodes[i]; 5491 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5492 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5493 ptr_idxs += olengths_idxs[i]; 5494 ptr_vals += olengths_vals[i]; 5495 if (nis) { 5496 source_dest = onodes_is[i]; 5497 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5498 ptr_idxs_is += olengths_idxs_is[i]; 5499 } 5500 if (nvecs) { 5501 source_dest = onodes[i]; 5502 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5503 ptr_vecs += olengths_idxs[i]-2; 5504 } 5505 } 5506 for (i=0;i<n_sends;i++) { 5507 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5508 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5509 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5510 if (nis) { 5511 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5512 } 5513 if (nvecs) { 5514 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5515 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5516 } 5517 } 5518 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5519 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5520 5521 /* assemble new l2g map */ 5522 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5523 ptr_idxs = recv_buffer_idxs; 5524 new_local_rows = 0; 5525 for (i=0;i<n_recvs;i++) { 5526 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5527 ptr_idxs += olengths_idxs[i]; 5528 } 5529 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5530 ptr_idxs = recv_buffer_idxs; 5531 new_local_rows = 0; 5532 for (i=0;i<n_recvs;i++) { 5533 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5534 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5535 ptr_idxs += olengths_idxs[i]; 5536 } 5537 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5538 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5539 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5540 5541 /* infer new local matrix type from received local matrices type */ 5542 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5543 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5544 if (n_recvs) { 5545 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5546 ptr_idxs = recv_buffer_idxs; 5547 for (i=0;i<n_recvs;i++) { 5548 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5549 new_local_type_private = MATAIJ_PRIVATE; 5550 break; 5551 } 5552 ptr_idxs += olengths_idxs[i]; 5553 } 5554 switch (new_local_type_private) { 5555 case MATDENSE_PRIVATE: 5556 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5557 new_local_type = MATSEQAIJ; 5558 bs = 1; 5559 } else { /* if I receive only 1 dense matrix */ 5560 new_local_type = MATSEQDENSE; 5561 bs = 1; 5562 } 5563 break; 5564 case MATAIJ_PRIVATE: 5565 new_local_type = MATSEQAIJ; 5566 bs = 1; 5567 break; 5568 case MATBAIJ_PRIVATE: 5569 new_local_type = MATSEQBAIJ; 5570 break; 5571 case MATSBAIJ_PRIVATE: 5572 new_local_type = MATSEQSBAIJ; 5573 break; 5574 default: 5575 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5576 break; 5577 } 5578 } else { /* by default, new_local_type is seqdense */ 5579 new_local_type = MATSEQDENSE; 5580 bs = 1; 5581 } 5582 5583 /* create MATIS object if needed */ 5584 if (!reuse) { 5585 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5586 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5587 } else { 5588 /* it also destroys the local matrices */ 5589 if (*mat_n) { 5590 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5591 } else { /* this is a fake object */ 5592 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5593 } 5594 } 5595 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5596 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5597 5598 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5599 5600 /* Global to local map of received indices */ 5601 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5602 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5603 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5604 5605 /* restore attributes -> type of incoming data and its size */ 5606 buf_size_idxs = 0; 5607 for (i=0;i<n_recvs;i++) { 5608 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5609 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5610 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5611 } 5612 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5613 5614 /* set preallocation */ 5615 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5616 if (!newisdense) { 5617 PetscInt *new_local_nnz=0; 5618 5619 ptr_idxs = recv_buffer_idxs_local; 5620 if (n_recvs) { 5621 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5622 } 5623 for (i=0;i<n_recvs;i++) { 5624 PetscInt j; 5625 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5626 for (j=0;j<*(ptr_idxs+1);j++) { 5627 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5628 } 5629 } else { 5630 /* TODO */ 5631 } 5632 ptr_idxs += olengths_idxs[i]; 5633 } 5634 if (new_local_nnz) { 5635 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5636 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5637 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5638 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5639 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5640 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5641 } else { 5642 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5643 } 5644 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5645 } else { 5646 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5647 } 5648 5649 /* set values */ 5650 ptr_vals = recv_buffer_vals; 5651 ptr_idxs = recv_buffer_idxs_local; 5652 for (i=0;i<n_recvs;i++) { 5653 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5654 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5655 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5656 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5657 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5658 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5659 } else { 5660 /* TODO */ 5661 } 5662 ptr_idxs += olengths_idxs[i]; 5663 ptr_vals += olengths_vals[i]; 5664 } 5665 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5666 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5667 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5668 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5669 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5670 5671 #if 0 5672 if (!restrict_comm) { /* check */ 5673 Vec lvec,rvec; 5674 PetscReal infty_error; 5675 5676 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5677 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5678 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5679 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5680 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5681 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5682 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5683 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5684 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5685 } 5686 #endif 5687 5688 /* assemble new additional is (if any) */ 5689 if (nis) { 5690 PetscInt **temp_idxs,*count_is,j,psum; 5691 5692 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5693 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5694 ptr_idxs = recv_buffer_idxs_is; 5695 psum = 0; 5696 for (i=0;i<n_recvs;i++) { 5697 for (j=0;j<nis;j++) { 5698 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5699 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5700 psum += plen; 5701 ptr_idxs += plen+1; /* shift pointer to received data */ 5702 } 5703 } 5704 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5705 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5706 for (i=1;i<nis;i++) { 5707 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5708 } 5709 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5710 ptr_idxs = recv_buffer_idxs_is; 5711 for (i=0;i<n_recvs;i++) { 5712 for (j=0;j<nis;j++) { 5713 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5714 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5715 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5716 ptr_idxs += plen+1; /* shift pointer to received data */ 5717 } 5718 } 5719 for (i=0;i<nis;i++) { 5720 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5721 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5722 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5723 } 5724 ierr = PetscFree(count_is);CHKERRQ(ierr); 5725 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5726 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5727 } 5728 /* free workspace */ 5729 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5730 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5731 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5732 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5733 if (isdense) { 5734 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5735 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5736 } else { 5737 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5738 } 5739 if (nis) { 5740 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5741 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5742 } 5743 5744 if (nvecs) { 5745 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5746 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5747 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5748 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5749 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5750 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5751 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5752 /* set values */ 5753 ptr_vals = recv_buffer_vecs; 5754 ptr_idxs = recv_buffer_idxs_local; 5755 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5756 for (i=0;i<n_recvs;i++) { 5757 PetscInt j; 5758 for (j=0;j<*(ptr_idxs+1);j++) { 5759 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5760 } 5761 ptr_idxs += olengths_idxs[i]; 5762 ptr_vals += olengths_idxs[i]-2; 5763 } 5764 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5765 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5766 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5767 } 5768 5769 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5770 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5771 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5772 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5773 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5774 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5775 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5776 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5777 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5778 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5779 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5780 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5781 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5782 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5783 ierr = PetscFree(onodes);CHKERRQ(ierr); 5784 if (nis) { 5785 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5786 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5787 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5788 } 5789 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5790 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5791 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5792 for (i=0;i<nis;i++) { 5793 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5794 } 5795 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5796 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5797 } 5798 *mat_n = NULL; 5799 } 5800 PetscFunctionReturn(0); 5801 } 5802 5803 /* temporary hack into ksp private data structure */ 5804 #include <petsc/private/kspimpl.h> 5805 5806 #undef __FUNCT__ 5807 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5808 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5809 { 5810 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5811 PC_IS *pcis = (PC_IS*)pc->data; 5812 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5813 Mat coarsedivudotp = NULL; 5814 MatNullSpace CoarseNullSpace = NULL; 5815 ISLocalToGlobalMapping coarse_islg; 5816 IS coarse_is,*isarray; 5817 PetscInt i,im_active=-1,active_procs=-1; 5818 PetscInt nis,nisdofs,nisneu,nisvert; 5819 PC pc_temp; 5820 PCType coarse_pc_type; 5821 KSPType coarse_ksp_type; 5822 PetscBool multilevel_requested,multilevel_allowed; 5823 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5824 Mat t_coarse_mat_is; 5825 PetscInt ncoarse; 5826 PetscBool compute_vecs = PETSC_FALSE; 5827 PetscScalar *array; 5828 MatReuse coarse_mat_reuse; 5829 PetscBool restr, full_restr, have_void; 5830 PetscErrorCode ierr; 5831 5832 PetscFunctionBegin; 5833 /* Assign global numbering to coarse dofs */ 5834 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5835 PetscInt ocoarse_size; 5836 compute_vecs = PETSC_TRUE; 5837 ocoarse_size = pcbddc->coarse_size; 5838 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5839 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5840 /* see if we can avoid some work */ 5841 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5842 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5843 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5844 PC pc; 5845 PetscBool isbddc; 5846 5847 /* temporary workaround since PCBDDC does not have a reset method so far */ 5848 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5849 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5850 if (isbddc) { 5851 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5852 } else { 5853 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5854 } 5855 coarse_reuse = PETSC_FALSE; 5856 } else { /* we can safely reuse already computed coarse matrix */ 5857 coarse_reuse = PETSC_TRUE; 5858 } 5859 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5860 coarse_reuse = PETSC_FALSE; 5861 } 5862 /* reset any subassembling information */ 5863 if (!coarse_reuse || pcbddc->recompute_topography) { 5864 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5865 } 5866 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5867 coarse_reuse = PETSC_TRUE; 5868 } 5869 /* assemble coarse matrix */ 5870 if (coarse_reuse && pcbddc->coarse_ksp) { 5871 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5872 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5873 coarse_mat_reuse = MAT_REUSE_MATRIX; 5874 } else { 5875 coarse_mat = NULL; 5876 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5877 } 5878 5879 /* creates temporary l2gmap and IS for coarse indexes */ 5880 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5881 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5882 5883 /* creates temporary MATIS object for coarse matrix */ 5884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5885 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5886 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5887 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5888 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5889 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5890 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5891 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5892 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5893 5894 /* count "active" (i.e. with positive local size) and "void" processes */ 5895 im_active = !!(pcis->n); 5896 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5897 5898 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5899 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5900 /* full_restr : just use the receivers from the subassembling pattern */ 5901 coarse_mat_is = NULL; 5902 multilevel_allowed = PETSC_FALSE; 5903 multilevel_requested = PETSC_FALSE; 5904 full_restr = PETSC_TRUE; 5905 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5906 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5907 if (multilevel_requested) { 5908 ncoarse = active_procs/pcbddc->coarsening_ratio; 5909 restr = PETSC_FALSE; 5910 full_restr = PETSC_FALSE; 5911 } else { 5912 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5913 restr = PETSC_TRUE; 5914 full_restr = PETSC_TRUE; 5915 } 5916 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5917 ncoarse = PetscMax(1,ncoarse); 5918 if (!pcbddc->coarse_subassembling) { 5919 if (pcbddc->coarsening_ratio > 1) { 5920 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5921 } else { 5922 PetscMPIInt size,rank; 5923 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5924 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5925 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5926 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5927 } 5928 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5929 PetscInt psum; 5930 PetscMPIInt size; 5931 if (pcbddc->coarse_ksp) psum = 1; 5932 else psum = 0; 5933 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5934 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5935 if (ncoarse < size) have_void = PETSC_TRUE; 5936 } 5937 /* determine if we can go multilevel */ 5938 if (multilevel_requested) { 5939 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5940 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5941 } 5942 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5943 5944 /* dump subassembling pattern */ 5945 if (pcbddc->dbg_flag && multilevel_allowed) { 5946 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5947 } 5948 5949 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5950 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5951 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5952 const PetscInt *idxs; 5953 ISLocalToGlobalMapping tmap; 5954 5955 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5956 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5957 /* allocate space for temporary storage */ 5958 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5959 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5960 /* allocate for IS array */ 5961 nisdofs = pcbddc->n_ISForDofsLocal; 5962 nisneu = !!pcbddc->NeumannBoundariesLocal; 5963 nisvert = 0; /* nisvert is not used */ 5964 nis = nisdofs + nisneu + nisvert; 5965 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5966 /* dofs splitting */ 5967 for (i=0;i<nisdofs;i++) { 5968 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5969 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5970 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5971 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5972 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5973 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5974 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5975 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5976 } 5977 /* neumann boundaries */ 5978 if (pcbddc->NeumannBoundariesLocal) { 5979 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5980 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5981 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5982 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5983 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5984 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5985 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5986 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5987 } 5988 /* free memory */ 5989 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5990 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5991 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5992 } else { 5993 nis = 0; 5994 nisdofs = 0; 5995 nisneu = 0; 5996 nisvert = 0; 5997 isarray = NULL; 5998 } 5999 /* destroy no longer needed map */ 6000 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6001 6002 /* subassemble */ 6003 if (multilevel_allowed) { 6004 Vec vp[1]; 6005 PetscInt nvecs = 0; 6006 PetscBool reuse,reuser; 6007 6008 if (coarse_mat) reuse = PETSC_TRUE; 6009 else reuse = PETSC_FALSE; 6010 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6011 vp[0] = NULL; 6012 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6013 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6014 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6015 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6016 nvecs = 1; 6017 6018 if (pcbddc->divudotp) { 6019 Mat B,loc_divudotp; 6020 Vec v,p; 6021 IS dummy; 6022 PetscInt np; 6023 6024 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6025 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6026 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6027 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6028 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6029 ierr = VecSet(p,1.);CHKERRQ(ierr); 6030 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6031 ierr = VecDestroy(&p);CHKERRQ(ierr); 6032 ierr = MatDestroy(&B);CHKERRQ(ierr); 6033 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6034 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6035 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6036 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6037 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6038 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6039 ierr = VecDestroy(&v);CHKERRQ(ierr); 6040 } 6041 } 6042 if (reuser) { 6043 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6044 } else { 6045 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6046 } 6047 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6048 PetscScalar *arraym,*arrayv; 6049 PetscInt nl; 6050 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6051 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6052 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6053 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6054 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6055 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6056 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6057 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6058 } else { 6059 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 6060 } 6061 } else { 6062 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6063 } 6064 if (coarse_mat_is || coarse_mat) { 6065 PetscMPIInt size; 6066 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6067 if (!multilevel_allowed) { 6068 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6069 } else { 6070 Mat A; 6071 6072 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6073 if (coarse_mat_is) { 6074 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6075 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6076 coarse_mat = coarse_mat_is; 6077 } 6078 /* be sure we don't have MatSeqDENSE as local mat */ 6079 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6080 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6081 } 6082 } 6083 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6084 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6085 6086 /* create local to global scatters for coarse problem */ 6087 if (compute_vecs) { 6088 PetscInt lrows; 6089 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6090 if (coarse_mat) { 6091 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6092 } else { 6093 lrows = 0; 6094 } 6095 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6096 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6097 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6098 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6099 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6100 } 6101 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6102 6103 /* set defaults for coarse KSP and PC */ 6104 if (multilevel_allowed) { 6105 coarse_ksp_type = KSPRICHARDSON; 6106 coarse_pc_type = PCBDDC; 6107 } else { 6108 coarse_ksp_type = KSPPREONLY; 6109 coarse_pc_type = PCREDUNDANT; 6110 } 6111 6112 /* print some info if requested */ 6113 if (pcbddc->dbg_flag) { 6114 if (!multilevel_allowed) { 6115 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6116 if (multilevel_requested) { 6117 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6118 } else if (pcbddc->max_levels) { 6119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6120 } 6121 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6122 } 6123 } 6124 6125 /* create the coarse KSP object only once with defaults */ 6126 if (coarse_mat) { 6127 PetscViewer dbg_viewer = NULL; 6128 if (pcbddc->dbg_flag) { 6129 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6130 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6131 } 6132 if (!pcbddc->coarse_ksp) { 6133 char prefix[256],str_level[16]; 6134 size_t len; 6135 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6136 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6137 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6138 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6139 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6140 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6141 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6142 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6143 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6144 /* prefix */ 6145 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6146 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6147 if (!pcbddc->current_level) { 6148 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6149 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6150 } else { 6151 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6152 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6153 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6154 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6155 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6156 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6157 } 6158 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6159 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6160 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6161 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6162 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6163 /* allow user customization */ 6164 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6165 } 6166 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6167 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6168 if (nisdofs) { 6169 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6170 for (i=0;i<nisdofs;i++) { 6171 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6172 } 6173 } 6174 if (nisneu) { 6175 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6176 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6177 } 6178 if (nisvert) { 6179 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6180 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6181 } 6182 6183 /* get some info after set from options */ 6184 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6185 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6186 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6187 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6188 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6189 isbddc = PETSC_FALSE; 6190 } 6191 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6192 if (isredundant) { 6193 KSP inner_ksp; 6194 PC inner_pc; 6195 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6196 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6197 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6198 } 6199 6200 /* parameters which miss an API */ 6201 if (isbddc) { 6202 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6203 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6204 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6205 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6206 if (pcbddc_coarse->benign_saddle_point) { 6207 Mat coarsedivudotp_is; 6208 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6209 IS row,col; 6210 const PetscInt *gidxs; 6211 PetscInt n,st,M,N; 6212 6213 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6214 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6215 st = st-n; 6216 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6217 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6218 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6219 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6220 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6221 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6222 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6223 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6224 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6225 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6226 ierr = ISDestroy(&row);CHKERRQ(ierr); 6227 ierr = ISDestroy(&col);CHKERRQ(ierr); 6228 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6229 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6230 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6231 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6232 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6233 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6234 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6235 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6236 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,NULL);CHKERRQ(ierr); 6237 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6238 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6239 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6240 } 6241 } 6242 6243 /* propagate symmetry info of coarse matrix */ 6244 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6245 if (pc->pmat->symmetric_set) { 6246 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6247 } 6248 if (pc->pmat->hermitian_set) { 6249 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6250 } 6251 if (pc->pmat->spd_set) { 6252 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6253 } 6254 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6255 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6256 } 6257 /* set operators */ 6258 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6259 if (pcbddc->dbg_flag) { 6260 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6261 } 6262 } 6263 ierr = PetscFree(isarray);CHKERRQ(ierr); 6264 #if 0 6265 { 6266 PetscViewer viewer; 6267 char filename[256]; 6268 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6269 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6270 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6271 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6272 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6273 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6274 } 6275 #endif 6276 6277 if (pcbddc->coarse_ksp) { 6278 Vec crhs,csol; 6279 6280 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6281 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6282 if (!csol) { 6283 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6284 } 6285 if (!crhs) { 6286 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6287 } 6288 } 6289 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6290 6291 /* compute null space for coarse solver if the benign trick has been requested */ 6292 if (pcbddc->benign_null) { 6293 6294 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6295 for (i=0;i<pcbddc->benign_n;i++) { 6296 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6297 } 6298 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6299 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6300 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6301 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6302 if (coarse_mat) { 6303 Vec nullv; 6304 PetscScalar *array,*array2; 6305 PetscInt nl; 6306 6307 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6308 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6309 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6310 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6311 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6312 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6313 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6314 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6315 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6316 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6317 } 6318 } 6319 6320 if (pcbddc->coarse_ksp) { 6321 PetscBool ispreonly; 6322 6323 if (CoarseNullSpace) { 6324 PetscBool isnull; 6325 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6326 if (isnull) { 6327 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6328 } 6329 /* TODO: add local nullspaces (if any) */ 6330 } 6331 /* setup coarse ksp */ 6332 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6333 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6334 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6335 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6336 KSP check_ksp; 6337 KSPType check_ksp_type; 6338 PC check_pc; 6339 Vec check_vec,coarse_vec; 6340 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6341 PetscInt its; 6342 PetscBool compute_eigs; 6343 PetscReal *eigs_r,*eigs_c; 6344 PetscInt neigs; 6345 const char *prefix; 6346 6347 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6348 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6349 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6350 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6351 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6352 /* prevent from setup unneeded object */ 6353 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6354 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6355 if (ispreonly) { 6356 check_ksp_type = KSPPREONLY; 6357 compute_eigs = PETSC_FALSE; 6358 } else { 6359 check_ksp_type = KSPGMRES; 6360 compute_eigs = PETSC_TRUE; 6361 } 6362 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6363 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6364 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6365 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6366 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6367 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6368 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6369 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6370 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6371 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6372 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6373 /* create random vec */ 6374 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6375 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6376 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6377 /* solve coarse problem */ 6378 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6379 /* set eigenvalue estimation if preonly has not been requested */ 6380 if (compute_eigs) { 6381 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6382 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6383 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6384 if (neigs) { 6385 lambda_max = eigs_r[neigs-1]; 6386 lambda_min = eigs_r[0]; 6387 if (pcbddc->use_coarse_estimates) { 6388 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6389 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6390 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6391 } 6392 } 6393 } 6394 } 6395 6396 /* check coarse problem residual error */ 6397 if (pcbddc->dbg_flag) { 6398 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6399 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6400 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6401 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6402 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6403 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6404 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6405 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6406 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6407 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6408 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6409 if (CoarseNullSpace) { 6410 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6411 } 6412 if (compute_eigs) { 6413 PetscReal lambda_max_s,lambda_min_s; 6414 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6415 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6416 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6417 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6418 for (i=0;i<neigs;i++) { 6419 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6420 } 6421 } 6422 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6423 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6424 } 6425 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6426 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6427 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6428 if (compute_eigs) { 6429 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6430 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6431 } 6432 } 6433 } 6434 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6435 /* print additional info */ 6436 if (pcbddc->dbg_flag) { 6437 /* waits until all processes reaches this point */ 6438 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6439 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6441 } 6442 6443 /* free memory */ 6444 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6445 PetscFunctionReturn(0); 6446 } 6447 6448 #undef __FUNCT__ 6449 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6450 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6451 { 6452 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6453 PC_IS* pcis = (PC_IS*)pc->data; 6454 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6455 IS subset,subset_mult,subset_n; 6456 PetscInt local_size,coarse_size=0; 6457 PetscInt *local_primal_indices=NULL; 6458 const PetscInt *t_local_primal_indices; 6459 PetscErrorCode ierr; 6460 6461 PetscFunctionBegin; 6462 /* Compute global number of coarse dofs */ 6463 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6464 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6465 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6466 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6467 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6468 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6469 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6470 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6471 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6472 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6473 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6474 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6475 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6476 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6477 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6478 6479 /* check numbering */ 6480 if (pcbddc->dbg_flag) { 6481 PetscScalar coarsesum,*array,*array2; 6482 PetscInt i; 6483 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6484 6485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6486 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6487 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6488 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6489 /* counter */ 6490 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6491 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6492 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6493 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6494 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6495 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6496 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6497 for (i=0;i<pcbddc->local_primal_size;i++) { 6498 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6499 } 6500 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6501 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6502 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6503 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6504 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6505 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6506 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6507 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6508 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6509 for (i=0;i<pcis->n;i++) { 6510 if (array[i] != 0.0 && array[i] != array2[i]) { 6511 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6512 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6513 set_error = PETSC_TRUE; 6514 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6515 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6516 } 6517 } 6518 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6519 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6520 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6521 for (i=0;i<pcis->n;i++) { 6522 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6523 } 6524 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6525 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6526 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6527 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6528 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6529 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6530 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6531 PetscInt *gidxs; 6532 6533 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6534 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6537 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6538 for (i=0;i<pcbddc->local_primal_size;i++) { 6539 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6540 } 6541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6542 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6543 } 6544 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6545 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6546 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6547 } 6548 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6549 /* get back data */ 6550 *coarse_size_n = coarse_size; 6551 *local_primal_indices_n = local_primal_indices; 6552 PetscFunctionReturn(0); 6553 } 6554 6555 #undef __FUNCT__ 6556 #define __FUNCT__ "PCBDDCGlobalToLocal" 6557 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6558 { 6559 IS localis_t; 6560 PetscInt i,lsize,*idxs,n; 6561 PetscScalar *vals; 6562 PetscErrorCode ierr; 6563 6564 PetscFunctionBegin; 6565 /* get indices in local ordering exploiting local to global map */ 6566 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6567 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6568 for (i=0;i<lsize;i++) vals[i] = 1.0; 6569 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6570 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6571 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6572 if (idxs) { /* multilevel guard */ 6573 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6574 } 6575 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6576 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6577 ierr = PetscFree(vals);CHKERRQ(ierr); 6578 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6579 /* now compute set in local ordering */ 6580 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6581 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6582 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6583 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6584 for (i=0,lsize=0;i<n;i++) { 6585 if (PetscRealPart(vals[i]) > 0.5) { 6586 lsize++; 6587 } 6588 } 6589 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6590 for (i=0,lsize=0;i<n;i++) { 6591 if (PetscRealPart(vals[i]) > 0.5) { 6592 idxs[lsize++] = i; 6593 } 6594 } 6595 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6596 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6597 *localis = localis_t; 6598 PetscFunctionReturn(0); 6599 } 6600 6601 #undef __FUNCT__ 6602 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6603 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6604 { 6605 PC_IS *pcis=(PC_IS*)pc->data; 6606 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6607 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6608 Mat S_j; 6609 PetscInt *used_xadj,*used_adjncy; 6610 PetscBool free_used_adj; 6611 PetscErrorCode ierr; 6612 6613 PetscFunctionBegin; 6614 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6615 free_used_adj = PETSC_FALSE; 6616 if (pcbddc->sub_schurs_layers == -1) { 6617 used_xadj = NULL; 6618 used_adjncy = NULL; 6619 } else { 6620 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6621 used_xadj = pcbddc->mat_graph->xadj; 6622 used_adjncy = pcbddc->mat_graph->adjncy; 6623 } else if (pcbddc->computed_rowadj) { 6624 used_xadj = pcbddc->mat_graph->xadj; 6625 used_adjncy = pcbddc->mat_graph->adjncy; 6626 } else { 6627 PetscBool flg_row=PETSC_FALSE; 6628 const PetscInt *xadj,*adjncy; 6629 PetscInt nvtxs; 6630 6631 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6632 if (flg_row) { 6633 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6634 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6635 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6636 free_used_adj = PETSC_TRUE; 6637 } else { 6638 pcbddc->sub_schurs_layers = -1; 6639 used_xadj = NULL; 6640 used_adjncy = NULL; 6641 } 6642 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6643 } 6644 } 6645 6646 /* setup sub_schurs data */ 6647 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6648 if (!sub_schurs->schur_explicit) { 6649 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6650 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6651 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6652 } else { 6653 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6654 PetscBool isseqaij,need_change = PETSC_FALSE;; 6655 PetscInt benign_n; 6656 Mat change = NULL; 6657 Vec scaling = NULL; 6658 IS change_primal = NULL; 6659 6660 if (!pcbddc->use_vertices && reuse_solvers) { 6661 PetscInt n_vertices; 6662 6663 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6664 reuse_solvers = (PetscBool)!n_vertices; 6665 } 6666 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6667 if (!isseqaij) { 6668 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6669 if (matis->A == pcbddc->local_mat) { 6670 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6671 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6672 } else { 6673 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6674 } 6675 } 6676 if (!pcbddc->benign_change_explicit) { 6677 benign_n = pcbddc->benign_n; 6678 } else { 6679 benign_n = 0; 6680 } 6681 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6682 We need a global reduction to avoid possible deadlocks. 6683 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6684 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6685 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6686 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6687 need_change = (PetscBool)(!need_change); 6688 } 6689 /* If the user defines additional constraints, we import them here. 6690 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6691 if (need_change) { 6692 PC_IS *pcisf; 6693 PC_BDDC *pcbddcf; 6694 PC pcf; 6695 6696 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6697 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6698 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6699 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6700 /* hacks */ 6701 pcisf = (PC_IS*)pcf->data; 6702 pcisf->is_B_local = pcis->is_B_local; 6703 pcisf->vec1_N = pcis->vec1_N; 6704 pcisf->BtoNmap = pcis->BtoNmap; 6705 pcisf->n = pcis->n; 6706 pcisf->n_B = pcis->n_B; 6707 pcbddcf = (PC_BDDC*)pcf->data; 6708 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6709 pcbddcf->mat_graph = pcbddc->mat_graph; 6710 pcbddcf->use_faces = PETSC_TRUE; 6711 pcbddcf->use_change_of_basis = PETSC_TRUE; 6712 pcbddcf->use_change_on_faces = PETSC_TRUE; 6713 pcbddcf->use_qr_single = PETSC_TRUE; 6714 pcbddcf->fake_change = PETSC_TRUE; 6715 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6716 /* store information on primal vertices and change of basis (in local numbering) */ 6717 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6718 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6719 change = pcbddcf->ConstraintMatrix; 6720 pcbddcf->ConstraintMatrix = NULL; 6721 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6722 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6723 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6724 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6725 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6726 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6727 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6728 pcf->ops->destroy = NULL; 6729 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6730 } 6731 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6732 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6733 ierr = MatDestroy(&change);CHKERRQ(ierr); 6734 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6735 } 6736 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6737 6738 /* free adjacency */ 6739 if (free_used_adj) { 6740 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6741 } 6742 PetscFunctionReturn(0); 6743 } 6744 6745 #undef __FUNCT__ 6746 #define __FUNCT__ "PCBDDCInitSubSchurs" 6747 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6748 { 6749 PC_IS *pcis=(PC_IS*)pc->data; 6750 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6751 PCBDDCGraph graph; 6752 PetscErrorCode ierr; 6753 6754 PetscFunctionBegin; 6755 /* attach interface graph for determining subsets */ 6756 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6757 IS verticesIS,verticescomm; 6758 PetscInt vsize,*idxs; 6759 6760 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6761 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6762 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6763 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6764 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6765 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6766 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6767 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6768 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6769 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6770 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6771 } else { 6772 graph = pcbddc->mat_graph; 6773 } 6774 /* print some info */ 6775 if (pcbddc->dbg_flag) { 6776 IS vertices; 6777 PetscInt nv,nedges,nfaces; 6778 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6779 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6780 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6781 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6782 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6783 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6785 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6786 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6787 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6788 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6789 } 6790 6791 /* sub_schurs init */ 6792 if (!pcbddc->sub_schurs) { 6793 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6794 } 6795 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6796 6797 /* free graph struct */ 6798 if (pcbddc->sub_schurs_rebuild) { 6799 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6800 } 6801 PetscFunctionReturn(0); 6802 } 6803 6804 #undef __FUNCT__ 6805 #define __FUNCT__ "PCBDDCCheckOperator" 6806 PetscErrorCode PCBDDCCheckOperator(PC pc) 6807 { 6808 PC_IS *pcis=(PC_IS*)pc->data; 6809 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6810 PetscErrorCode ierr; 6811 6812 PetscFunctionBegin; 6813 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6814 IS zerodiag = NULL; 6815 Mat S_j,B0_B=NULL; 6816 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6817 PetscScalar *p0_check,*array,*array2; 6818 PetscReal norm; 6819 PetscInt i; 6820 6821 /* B0 and B0_B */ 6822 if (zerodiag) { 6823 IS dummy; 6824 6825 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6826 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6827 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6828 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6829 } 6830 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6831 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6832 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6833 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6834 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6835 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6836 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6837 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6838 /* S_j */ 6839 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6840 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6841 6842 /* mimic vector in \widetilde{W}_\Gamma */ 6843 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6844 /* continuous in primal space */ 6845 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6846 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6847 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6848 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6849 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6850 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6851 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6852 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6853 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6854 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6855 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6856 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6857 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6858 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6859 6860 /* assemble rhs for coarse problem */ 6861 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6862 /* local with Schur */ 6863 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6864 if (zerodiag) { 6865 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6866 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6867 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6868 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6869 } 6870 /* sum on primal nodes the local contributions */ 6871 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6872 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6873 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6874 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6875 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6876 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6877 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6878 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6879 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6880 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6881 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6882 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6883 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6884 /* scale primal nodes (BDDC sums contibutions) */ 6885 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6886 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6887 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6888 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6889 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6890 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6891 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6892 /* global: \widetilde{B0}_B w_\Gamma */ 6893 if (zerodiag) { 6894 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6895 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6896 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6897 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6898 } 6899 /* BDDC */ 6900 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6901 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6902 6903 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6904 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6905 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6906 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6907 for (i=0;i<pcbddc->benign_n;i++) { 6908 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6909 } 6910 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6911 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6912 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6913 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6914 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6915 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6916 } 6917 PetscFunctionReturn(0); 6918 } 6919