1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 10 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PCBDDCGraph graph, MatNullSpace *nnsp) 11 { 12 PetscErrorCode ierr; 13 ISLocalToGlobalMapping rmap; 14 IS *faces,*edges; 15 Vec p,v,quad_vec,*quad_vecs; 16 PetscScalar *vals; 17 const PetscScalar *array; 18 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 19 20 PetscFunctionBegin; 21 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 22 if (graph->twodim) { 23 lmaxneighs = 2; 24 } else { 25 lmaxneighs = 1; 26 for (i=0;i<ne;i++) { 27 const PetscInt *idxs; 28 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 29 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 30 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 31 } 32 lmaxneighs++; /* graph count does not include self */ 33 } 34 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 35 maxsize = 0; 36 for (i=0;i<ne;i++) { 37 PetscInt nn; 38 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 39 maxsize = PetscMax(maxsize,nn); 40 } 41 for (i=0;i<nf;i++) { 42 PetscInt nn; 43 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 44 maxsize = PetscMax(maxsize,nn); 45 } 46 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 47 /* create vectors to hold quadrature weights */ 48 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 49 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 50 ierr = MatGetLocalToGlobalMapping(A,&rmap,NULL);CHKERRQ(ierr); 51 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 52 for (i=0;i<maxneighs;i++) { 53 PetscInt first,last; 54 55 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],rmap);CHKERRQ(ierr); 56 /* the near-null space fo BDDC carries information on quadrature weights, 57 and these can be collinear -> so cheat with MatNullSpaceCreate 58 and create a suitable set of basis vectors first */ 59 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 60 if (i>=first && i < last) { 61 PetscScalar *data; 62 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 63 data[i-first] = 1.; 64 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 65 } 66 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 67 } 68 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 69 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 70 /* compute local quad vec */ 71 ierr = MatCreateVecs(divudotp,&v,&p);CHKERRQ(ierr); 72 ierr = VecSet(p,1.);CHKERRQ(ierr); 73 ierr = MatMultTranspose(divudotp,p,v);CHKERRQ(ierr); 74 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 75 ierr = VecDestroy(&p);CHKERRQ(ierr); 76 for (i=0;i<nf;i++) { 77 const PetscInt *idxs; 78 PetscInt idx,nn,j; 79 80 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 81 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 82 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 83 ierr = PetscFindInt(PetscGlobalRank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 84 idx = -(idx+1); 85 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 86 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 87 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 88 } 89 for (i=0;i<ne;i++) { 90 const PetscInt *idxs; 91 PetscInt idx,nn,j; 92 93 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 94 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 95 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 96 ierr = PetscFindInt(PetscGlobalRank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 97 idx = -(idx+1); 98 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 99 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 100 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 101 } 102 ierr = PetscFree(edges);CHKERRQ(ierr); 103 ierr = PetscFree(faces);CHKERRQ(ierr); 104 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 105 ierr = VecDestroy(&v);CHKERRQ(ierr); 106 ierr = PetscFree(vals);CHKERRQ(ierr); 107 108 /* assemble near null space */ 109 for (i=0;i<maxneighs;i++) { 110 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 111 } 112 for (i=0;i<maxneighs;i++) { 113 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 114 ierr = VecNormalize(quad_vecs[i],NULL);CHKERRQ(ierr); 115 } 116 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 123 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 124 { 125 PetscErrorCode ierr; 126 Vec local,global; 127 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 128 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 129 130 PetscFunctionBegin; 131 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 132 /* need to convert from global to local topology information and remove references to information in global ordering */ 133 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 134 if (pcbddc->user_provided_isfordofs) { 135 if (pcbddc->n_ISForDofs) { 136 PetscInt i; 137 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 138 for (i=0;i<pcbddc->n_ISForDofs;i++) { 139 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 140 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 141 } 142 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 143 pcbddc->n_ISForDofs = 0; 144 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 145 } 146 } else { 147 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 148 PetscInt i, n = matis->A->rmap->n; 149 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 150 if (i > 1) { 151 pcbddc->n_ISForDofsLocal = i; 152 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 153 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 154 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 155 } 156 } 157 } 158 } 159 160 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 161 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 162 } 163 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 164 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 165 } 166 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 167 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 168 } 169 ierr = VecDestroy(&global);CHKERRQ(ierr); 170 ierr = VecDestroy(&local);CHKERRQ(ierr); 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 176 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 177 { 178 PC_IS *pcis = (PC_IS*)(pc->data); 179 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 if (!pcbddc->benign_have_null) { 184 PetscFunctionReturn(0); 185 } 186 if (pcbddc->ChangeOfBasisMatrix) { 187 Vec swap; 188 189 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 190 swap = pcbddc->work_change; 191 pcbddc->work_change = r; 192 r = swap; 193 } 194 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 195 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 196 if (pcbddc->benign_n) { 197 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 198 } else { 199 ierr = VecSet(pcis->vec2_D,0.);CHKERRQ(ierr); 200 } 201 ierr = VecSet(z,0.);CHKERRQ(ierr); 202 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 203 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 204 if (pcbddc->ChangeOfBasisMatrix) { 205 Vec swap; 206 207 swap = r; 208 r = pcbddc->work_change; 209 pcbddc->work_change = swap; 210 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 211 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 212 } 213 PetscFunctionReturn(0); 214 } 215 216 #undef __FUNCT__ 217 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 218 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 219 { 220 PCBDDCBenignMatMult_ctx ctx; 221 PetscErrorCode ierr; 222 PetscBool apply_right,apply_left,reset_x; 223 224 PetscFunctionBegin; 225 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 226 if (transpose) { 227 apply_right = ctx->apply_left; 228 apply_left = ctx->apply_right; 229 } else { 230 apply_right = ctx->apply_right; 231 apply_left = ctx->apply_left; 232 } 233 reset_x = PETSC_FALSE; 234 if (apply_right) { 235 const PetscScalar *ax; 236 PetscInt nl,i; 237 238 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 239 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 240 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 241 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 242 for (i=0;i<ctx->benign_n;i++) { 243 PetscScalar sum,val; 244 const PetscInt *idxs; 245 PetscInt nz,j; 246 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 247 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 248 sum = 0.; 249 if (ctx->apply_p0) { 250 val = ctx->work[idxs[nz-1]]; 251 for (j=0;j<nz-1;j++) { 252 sum += ctx->work[idxs[j]]; 253 ctx->work[idxs[j]] += val; 254 } 255 } else { 256 for (j=0;j<nz-1;j++) { 257 sum += ctx->work[idxs[j]]; 258 } 259 } 260 ctx->work[idxs[nz-1]] -= sum; 261 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 262 } 263 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 264 reset_x = PETSC_TRUE; 265 } 266 if (transpose) { 267 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 268 } else { 269 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 270 } 271 if (reset_x) { 272 ierr = VecResetArray(x);CHKERRQ(ierr); 273 } 274 if (apply_left) { 275 PetscScalar *ay; 276 PetscInt i; 277 278 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 279 for (i=0;i<ctx->benign_n;i++) { 280 PetscScalar sum,val; 281 const PetscInt *idxs; 282 PetscInt nz,j; 283 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 284 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 285 val = -ay[idxs[nz-1]]; 286 if (ctx->apply_p0) { 287 sum = 0.; 288 for (j=0;j<nz-1;j++) { 289 sum += ay[idxs[j]]; 290 ay[idxs[j]] += val; 291 } 292 ay[idxs[nz-1]] += sum; 293 } else { 294 for (j=0;j<nz-1;j++) { 295 ay[idxs[j]] += val; 296 } 297 ay[idxs[nz-1]] = 0.; 298 } 299 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 300 } 301 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 302 } 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 308 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 309 { 310 PetscErrorCode ierr; 311 312 PetscFunctionBegin; 313 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 319 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 320 { 321 PetscErrorCode ierr; 322 323 PetscFunctionBegin; 324 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "PCBDDCBenignShellMat" 330 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 331 { 332 PC_IS *pcis = (PC_IS*)pc->data; 333 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 334 PCBDDCBenignMatMult_ctx ctx; 335 PetscErrorCode ierr; 336 337 PetscFunctionBegin; 338 if (!restore) { 339 Mat A_IB,A_BI; 340 PetscScalar *work; 341 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 342 343 if (pcbddc->benign_original_mat) { 344 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 345 } 346 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 347 PetscFunctionReturn(0); 348 } 349 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 350 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 351 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 352 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 353 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 354 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 355 ierr = PetscNew(&ctx);CHKERRQ(ierr); 356 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 357 ctx->apply_left = PETSC_TRUE; 358 ctx->apply_right = PETSC_FALSE; 359 ctx->apply_p0 = PETSC_FALSE; 360 ctx->benign_n = pcbddc->benign_n; 361 if (reuse) { 362 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 363 ctx->free = PETSC_FALSE; 364 } else { /* TODO: could be optimized for successive solves */ 365 ISLocalToGlobalMapping N_to_D; 366 PetscInt i; 367 368 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 369 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 370 for (i=0;i<pcbddc->benign_n;i++) { 371 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 372 } 373 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 374 ctx->free = PETSC_TRUE; 375 } 376 ctx->A = pcis->A_IB; 377 ctx->work = work; 378 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 379 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 380 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 381 pcis->A_IB = A_IB; 382 383 /* A_BI as A_IB^T */ 384 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 385 pcbddc->benign_original_mat = pcis->A_BI; 386 pcis->A_BI = A_BI; 387 } else { 388 if (!pcbddc->benign_original_mat) { 389 PetscFunctionReturn(0); 390 } 391 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 392 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 393 pcis->A_IB = ctx->A; 394 ctx->A = NULL; 395 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 396 pcis->A_BI = pcbddc->benign_original_mat; 397 pcbddc->benign_original_mat = NULL; 398 if (ctx->free) { 399 PetscInt i; 400 for (i=0;i<ctx->benign_n;i++) { 401 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 402 } 403 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 404 } 405 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 406 ierr = PetscFree(ctx);CHKERRQ(ierr); 407 } 408 PetscFunctionReturn(0); 409 } 410 411 /* used just in bddc debug mode */ 412 #undef __FUNCT__ 413 #define __FUNCT__ "PCBDDCBenignProject" 414 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 415 { 416 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 417 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 418 Mat An; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 423 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 424 if (is1) { 425 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 426 ierr = MatDestroy(&An);CHKERRQ(ierr); 427 } else { 428 *B = An; 429 } 430 PetscFunctionReturn(0); 431 } 432 433 /* TODO: add reuse flag */ 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatSeqAIJCompress" 436 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 437 { 438 Mat Bt; 439 PetscScalar *a,*bdata; 440 const PetscInt *ii,*ij; 441 PetscInt m,n,i,nnz,*bii,*bij; 442 PetscBool flg_row; 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 447 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 448 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 449 nnz = n; 450 for (i=0;i<ii[n];i++) { 451 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 452 } 453 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 454 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 455 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 456 nnz = 0; 457 bii[0] = 0; 458 for (i=0;i<n;i++) { 459 PetscInt j; 460 for (j=ii[i];j<ii[i+1];j++) { 461 PetscScalar entry = a[j]; 462 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 463 bij[nnz] = ij[j]; 464 bdata[nnz] = entry; 465 nnz++; 466 } 467 } 468 bii[i+1] = nnz; 469 } 470 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 471 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 472 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 473 { 474 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 475 b->free_a = PETSC_TRUE; 476 b->free_ij = PETSC_TRUE; 477 } 478 *B = Bt; 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatDetectDisconnectedComponents" 484 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 485 { 486 Mat B; 487 IS is_dummy,*cc_n; 488 ISLocalToGlobalMapping l2gmap_dummy; 489 PCBDDCGraph graph; 490 PetscInt i,n; 491 PetscInt *xadj,*adjncy; 492 PetscInt *xadj_filtered,*adjncy_filtered; 493 PetscBool flg_row,isseqaij; 494 PetscErrorCode ierr; 495 496 PetscFunctionBegin; 497 if (!A->rmap->N || !A->cmap->N) { 498 *ncc = 0; 499 *cc = NULL; 500 PetscFunctionReturn(0); 501 } 502 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 503 if (!isseqaij && filter) { 504 PetscBool isseqdense; 505 506 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 507 if (!isseqdense) { 508 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 509 } else { /* TODO: rectangular case and LDA */ 510 PetscScalar *array; 511 PetscReal chop=1.e-6; 512 513 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 514 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 515 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 516 for (i=0;i<n;i++) { 517 PetscInt j; 518 for (j=i+1;j<n;j++) { 519 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 520 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 521 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 522 } 523 } 524 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 525 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 526 } 527 } else { 528 B = A; 529 } 530 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 531 532 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 533 if (filter) { 534 PetscScalar *data; 535 PetscInt j,cum; 536 537 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 538 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 539 cum = 0; 540 for (i=0;i<n;i++) { 541 PetscInt t; 542 543 for (j=xadj[i];j<xadj[i+1];j++) { 544 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 545 continue; 546 } 547 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 548 } 549 t = xadj_filtered[i]; 550 xadj_filtered[i] = cum; 551 cum += t; 552 } 553 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 554 } else { 555 xadj_filtered = NULL; 556 adjncy_filtered = NULL; 557 } 558 559 /* compute local connected components using PCBDDCGraph */ 560 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 561 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 562 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 563 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 564 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 565 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 566 if (xadj_filtered) { 567 graph->xadj = xadj_filtered; 568 graph->adjncy = adjncy_filtered; 569 } else { 570 graph->xadj = xadj; 571 graph->adjncy = adjncy; 572 } 573 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 574 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 575 /* partial clean up */ 576 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 577 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 578 if (A != B) { 579 ierr = MatDestroy(&B);CHKERRQ(ierr); 580 } 581 582 /* get back data */ 583 if (ncc) *ncc = graph->ncc; 584 if (cc) { 585 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 586 for (i=0;i<graph->ncc;i++) { 587 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 588 } 589 *cc = cc_n; 590 } 591 /* clean up graph */ 592 graph->xadj = 0; 593 graph->adjncy = 0; 594 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "PCBDDCBenignCheck" 600 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 601 { 602 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 603 PC_IS* pcis = (PC_IS*)(pc->data); 604 IS dirIS = NULL; 605 PetscInt i; 606 PetscErrorCode ierr; 607 608 PetscFunctionBegin; 609 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 610 if (zerodiag) { 611 Mat A; 612 Vec vec3_N; 613 PetscScalar *vals; 614 const PetscInt *idxs; 615 PetscInt nz,*count; 616 617 /* p0 */ 618 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 619 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 620 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 621 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 622 for (i=0;i<nz;i++) vals[i] = 1.; 623 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 624 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 625 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 626 /* v_I */ 627 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 628 for (i=0;i<nz;i++) vals[i] = 0.; 629 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 630 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 631 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 632 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 633 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 634 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 635 if (dirIS) { 636 PetscInt n; 637 638 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 639 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 640 for (i=0;i<n;i++) vals[i] = 0.; 641 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 642 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 643 } 644 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 645 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 646 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 647 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 648 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 649 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 650 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 651 if (PetscAbsScalar(vals[0]) > 1.e-1) { 652 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 653 } 654 ierr = PetscFree(vals);CHKERRQ(ierr); 655 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 656 657 /* there should not be any pressure dofs lying on the interface */ 658 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 659 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 660 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 661 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 662 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 663 for (i=0;i<nz;i++) { 664 if (count[idxs[i]]) { 665 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 666 } 667 } 668 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 669 ierr = PetscFree(count);CHKERRQ(ierr); 670 } 671 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 672 673 /* check PCBDDCBenignGetOrSetP0 */ 674 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 675 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 676 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 677 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 678 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 679 for (i=0;i<pcbddc->benign_n;i++) { 680 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 681 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 682 } 683 } 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 689 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 690 { 691 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 692 IS pressures,zerodiag,*zerodiag_subs; 693 PetscInt nz,n; 694 PetscInt *interior_dofs,n_interior_dofs; 695 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 701 for (n=0;n<pcbddc->benign_n;n++) { 702 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 703 } 704 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 705 pcbddc->benign_n = 0; 706 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 707 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 708 Checks if all the pressure dofs in each subdomain have a zero diagonal 709 If not, a change of basis on pressures is not needed 710 since the local Schur complements are SPD 711 */ 712 has_null_pressures = PETSC_TRUE; 713 have_null = PETSC_TRUE; 714 if (pcbddc->n_ISForDofsLocal) { 715 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 716 717 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 718 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 719 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 720 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 721 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 722 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 723 if (!sorted) { 724 ierr = ISSort(pressures);CHKERRQ(ierr); 725 } 726 } else { 727 pressures = NULL; 728 } 729 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 730 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 731 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 732 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 733 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 734 if (!sorted) { 735 ierr = ISSort(zerodiag);CHKERRQ(ierr); 736 } 737 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 738 if (!nz) { 739 if (n) have_null = PETSC_FALSE; 740 has_null_pressures = PETSC_FALSE; 741 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 742 } 743 recompute_zerodiag = PETSC_FALSE; 744 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 745 zerodiag_subs = NULL; 746 pcbddc->benign_n = 0; 747 n_interior_dofs = 0; 748 interior_dofs = NULL; 749 if (pcbddc->current_level) { /* need to compute interior nodes */ 750 PetscInt n,i,j; 751 PetscInt n_neigh,*neigh,*n_shared,**shared; 752 PetscInt *iwork; 753 754 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 755 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 756 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 757 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 758 for (i=0;i<n_neigh;i++) 759 for (j=0;j<n_shared[i];j++) 760 iwork[shared[i][j]] += 1; 761 for (i=0;i<n;i++) 762 if (!iwork[i]) 763 interior_dofs[n_interior_dofs++] = i; 764 ierr = PetscFree(iwork);CHKERRQ(ierr); 765 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 766 } 767 if (has_null_pressures) { 768 IS *subs; 769 PetscInt nsubs,i,j,nl; 770 const PetscInt *idxs; 771 PetscScalar *array; 772 Vec *work; 773 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 774 775 subs = pcbddc->local_subs; 776 nsubs = pcbddc->n_local_subs; 777 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 778 if (pcbddc->current_level) { 779 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 780 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 781 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 782 /* work[0] = 1_p */ 783 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 784 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 785 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 786 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 787 /* work[0] = 1_v */ 788 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 789 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 790 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 791 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 792 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 793 } 794 if (nsubs > 1) { 795 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 796 for (i=0;i<nsubs;i++) { 797 ISLocalToGlobalMapping l2g; 798 IS t_zerodiag_subs; 799 PetscInt nl; 800 801 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 802 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 803 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 804 if (nl) { 805 PetscBool valid = PETSC_TRUE; 806 807 if (pcbddc->current_level) { 808 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 809 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 810 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 811 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 812 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 813 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 814 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 815 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 816 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 817 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 818 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 819 for (j=0;j<n_interior_dofs;j++) { 820 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 821 valid = PETSC_FALSE; 822 break; 823 } 824 } 825 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 826 } 827 if (valid && pcbddc->NeumannBoundariesLocal) { 828 IS t_bc; 829 PetscInt nzb; 830 831 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 832 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 833 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 834 if (nzb) valid = PETSC_FALSE; 835 } 836 if (valid && pressures) { 837 IS t_pressure_subs; 838 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 839 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 840 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 841 } 842 if (valid) { 843 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 844 pcbddc->benign_n++; 845 } else { 846 recompute_zerodiag = PETSC_TRUE; 847 } 848 } 849 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 850 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 851 } 852 } else { /* there's just one subdomain (or zero if they have not been detected */ 853 PetscBool valid = PETSC_TRUE; 854 855 if (pcbddc->NeumannBoundariesLocal) { 856 PetscInt nzb; 857 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 858 if (nzb) valid = PETSC_FALSE; 859 } 860 if (valid && pressures) { 861 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 862 } 863 if (valid && pcbddc->current_level) { 864 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 865 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 866 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 867 for (j=0;j<n_interior_dofs;j++) { 868 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 869 valid = PETSC_FALSE; 870 break; 871 } 872 } 873 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 874 } 875 if (valid) { 876 pcbddc->benign_n = 1; 877 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 878 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 879 zerodiag_subs[0] = zerodiag; 880 } 881 } 882 if (pcbddc->current_level) { 883 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 884 } 885 } 886 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 887 888 if (!pcbddc->benign_n) { 889 PetscInt n; 890 891 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 892 recompute_zerodiag = PETSC_FALSE; 893 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 894 if (n) { 895 has_null_pressures = PETSC_FALSE; 896 have_null = PETSC_FALSE; 897 } 898 } 899 900 /* final check for null pressures */ 901 if (zerodiag && pressures) { 902 PetscInt nz,np; 903 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 904 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 905 if (nz != np) have_null = PETSC_FALSE; 906 } 907 908 if (recompute_zerodiag) { 909 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 910 if (pcbddc->benign_n == 1) { 911 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 912 zerodiag = zerodiag_subs[0]; 913 } else { 914 PetscInt i,nzn,*new_idxs; 915 916 nzn = 0; 917 for (i=0;i<pcbddc->benign_n;i++) { 918 PetscInt ns; 919 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 920 nzn += ns; 921 } 922 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 923 nzn = 0; 924 for (i=0;i<pcbddc->benign_n;i++) { 925 PetscInt ns,*idxs; 926 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 927 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 928 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 930 nzn += ns; 931 } 932 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 933 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 934 } 935 have_null = PETSC_FALSE; 936 } 937 938 /* Prepare matrix to compute no-net-flux */ 939 if (pcbddc->benign_compute_nonetflux && !pcbddc->divudotp) { 940 Mat A; 941 IS isused = NULL; 942 if (pressures) { 943 isused = pressures; 944 } else { 945 isused = zerodiag; 946 } 947 if (!isused) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 948 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 949 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&pcbddc->divudotp);CHKERRQ(ierr); 950 } 951 952 /* change of basis and p0 dofs */ 953 if (has_null_pressures) { 954 IS zerodiagc; 955 const PetscInt *idxs,*idxsc; 956 PetscInt i,s,*nnz; 957 958 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 959 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 960 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 961 /* local change of basis for pressures */ 962 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 963 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 964 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 965 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 966 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 967 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 968 for (i=0;i<pcbddc->benign_n;i++) { 969 PetscInt nzs,j; 970 971 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 972 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 973 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 974 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 975 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 976 } 977 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 978 ierr = PetscFree(nnz);CHKERRQ(ierr); 979 /* set identity on velocities */ 980 for (i=0;i<n-nz;i++) { 981 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 982 } 983 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 984 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 985 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 986 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 987 /* set change on pressures */ 988 for (s=0;s<pcbddc->benign_n;s++) { 989 PetscScalar *array; 990 PetscInt nzs; 991 992 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 993 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 994 for (i=0;i<nzs-1;i++) { 995 PetscScalar vals[2]; 996 PetscInt cols[2]; 997 998 cols[0] = idxs[i]; 999 cols[1] = idxs[nzs-1]; 1000 vals[0] = 1.; 1001 vals[1] = 1.; 1002 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1003 } 1004 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1005 for (i=0;i<nzs-1;i++) array[i] = -1.; 1006 array[nzs-1] = 1.; 1007 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1008 /* store local idxs for p0 */ 1009 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1010 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1011 ierr = PetscFree(array);CHKERRQ(ierr); 1012 } 1013 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1014 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1015 /* project if needed */ 1016 if (pcbddc->benign_change_explicit) { 1017 Mat M; 1018 1019 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1020 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1021 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1022 ierr = MatDestroy(&M);CHKERRQ(ierr); 1023 } 1024 /* store global idxs for p0 */ 1025 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1026 } 1027 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1028 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1029 1030 /* determines if the coarse solver will be singular or not */ 1031 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1032 /* determines if the problem has subdomains with 0 pressure block */ 1033 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1034 *zerodiaglocal = zerodiag; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1040 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1041 { 1042 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1043 PetscScalar *array; 1044 PetscErrorCode ierr; 1045 1046 PetscFunctionBegin; 1047 if (!pcbddc->benign_sf) { 1048 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1049 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1050 } 1051 if (get) { 1052 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1053 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1054 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1056 } else { 1057 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1058 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1059 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1060 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1067 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1068 { 1069 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1070 PetscErrorCode ierr; 1071 1072 PetscFunctionBegin; 1073 /* TODO: add error checking 1074 - avoid nested pop (or push) calls. 1075 - cannot push before pop. 1076 - cannot call this if pcbddc->local_mat is NULL 1077 */ 1078 if (!pcbddc->benign_n) { 1079 PetscFunctionReturn(0); 1080 } 1081 if (pop) { 1082 if (pcbddc->benign_change_explicit) { 1083 IS is_p0; 1084 MatReuse reuse; 1085 1086 /* extract B_0 */ 1087 reuse = MAT_INITIAL_MATRIX; 1088 if (pcbddc->benign_B0) { 1089 reuse = MAT_REUSE_MATRIX; 1090 } 1091 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1092 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1093 /* remove rows and cols from local problem */ 1094 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1095 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1096 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1097 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1098 } else { 1099 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1100 PetscScalar *vals; 1101 PetscInt i,n,*idxs_ins; 1102 1103 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1104 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1105 if (!pcbddc->benign_B0) { 1106 PetscInt *nnz; 1107 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1108 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1109 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1110 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1111 for (i=0;i<pcbddc->benign_n;i++) { 1112 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1113 nnz[i] = n - nnz[i]; 1114 } 1115 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1116 ierr = PetscFree(nnz);CHKERRQ(ierr); 1117 } 1118 1119 for (i=0;i<pcbddc->benign_n;i++) { 1120 PetscScalar *array; 1121 PetscInt *idxs,j,nz,cum; 1122 1123 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1124 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1125 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1126 for (j=0;j<nz;j++) vals[j] = 1.; 1127 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1128 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1129 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1130 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1131 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1132 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1133 cum = 0; 1134 for (j=0;j<n;j++) { 1135 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1136 vals[cum] = array[j]; 1137 idxs_ins[cum] = j; 1138 cum++; 1139 } 1140 } 1141 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1142 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1143 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1144 } 1145 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1146 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1147 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1148 } 1149 } else { /* push */ 1150 if (pcbddc->benign_change_explicit) { 1151 PetscInt i; 1152 1153 for (i=0;i<pcbddc->benign_n;i++) { 1154 PetscScalar *B0_vals; 1155 PetscInt *B0_cols,B0_ncol; 1156 1157 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1158 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1159 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1160 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1161 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1162 } 1163 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 } else { 1166 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1167 } 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1174 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1175 { 1176 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1177 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1178 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1179 PetscBLASInt *B_iwork,*B_ifail; 1180 PetscScalar *work,lwork; 1181 PetscScalar *St,*S,*eigv; 1182 PetscScalar *Sarray,*Starray; 1183 PetscReal *eigs,thresh; 1184 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1185 PetscBool allocated_S_St; 1186 #if defined(PETSC_USE_COMPLEX) 1187 PetscReal *rwork; 1188 #endif 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1193 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1194 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1195 1196 if (pcbddc->dbg_flag) { 1197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1198 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1199 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1200 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1201 } 1202 1203 if (pcbddc->dbg_flag) { 1204 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1205 } 1206 1207 /* max size of subsets */ 1208 mss = 0; 1209 for (i=0;i<sub_schurs->n_subs;i++) { 1210 PetscInt subset_size; 1211 1212 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1213 mss = PetscMax(mss,subset_size); 1214 } 1215 1216 /* min/max and threshold */ 1217 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1218 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1219 nmax = PetscMax(nmin,nmax); 1220 allocated_S_St = PETSC_FALSE; 1221 if (nmin) { 1222 allocated_S_St = PETSC_TRUE; 1223 } 1224 1225 /* allocate lapack workspace */ 1226 cum = cum2 = 0; 1227 maxneigs = 0; 1228 for (i=0;i<sub_schurs->n_subs;i++) { 1229 PetscInt n,subset_size; 1230 1231 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1232 n = PetscMin(subset_size,nmax); 1233 cum += subset_size; 1234 cum2 += subset_size*n; 1235 maxneigs = PetscMax(maxneigs,n); 1236 } 1237 if (mss) { 1238 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1239 PetscBLASInt B_itype = 1; 1240 PetscBLASInt B_N = mss; 1241 PetscReal zero = 0.0; 1242 PetscReal eps = 0.0; /* dlamch? */ 1243 1244 B_lwork = -1; 1245 S = NULL; 1246 St = NULL; 1247 eigs = NULL; 1248 eigv = NULL; 1249 B_iwork = NULL; 1250 B_ifail = NULL; 1251 #if defined(PETSC_USE_COMPLEX) 1252 rwork = NULL; 1253 #endif 1254 thresh = 1.0; 1255 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1256 #if defined(PETSC_USE_COMPLEX) 1257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1258 #else 1259 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1260 #endif 1261 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1262 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1263 } else { 1264 /* TODO */ 1265 } 1266 } else { 1267 lwork = 0; 1268 } 1269 1270 nv = 0; 1271 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1272 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1273 } 1274 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1275 if (allocated_S_St) { 1276 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1277 } 1278 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1279 #if defined(PETSC_USE_COMPLEX) 1280 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1281 #endif 1282 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1283 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1284 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1285 nv+cum,&pcbddc->adaptive_constraints_idxs, 1286 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1287 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1288 1289 maxneigs = 0; 1290 cum = cumarray = 0; 1291 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1292 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1293 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1294 const PetscInt *idxs; 1295 1296 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1297 for (cum=0;cum<nv;cum++) { 1298 pcbddc->adaptive_constraints_n[cum] = 1; 1299 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1300 pcbddc->adaptive_constraints_data[cum] = 1.0; 1301 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1302 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1303 } 1304 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1305 } 1306 1307 if (mss) { /* multilevel */ 1308 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1309 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1310 } 1311 1312 thresh = pcbddc->adaptive_threshold; 1313 for (i=0;i<sub_schurs->n_subs;i++) { 1314 const PetscInt *idxs; 1315 PetscReal upper,lower; 1316 PetscInt j,subset_size,eigs_start = 0; 1317 PetscBLASInt B_N; 1318 PetscBool same_data = PETSC_FALSE; 1319 1320 if (pcbddc->use_deluxe_scaling) { 1321 upper = PETSC_MAX_REAL; 1322 lower = thresh; 1323 } else { 1324 upper = 1./thresh; 1325 lower = 0.; 1326 } 1327 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1328 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1329 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1330 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1331 if (sub_schurs->is_hermitian) { 1332 PetscInt j,k; 1333 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1334 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1335 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1336 } 1337 for (j=0;j<subset_size;j++) { 1338 for (k=j;k<subset_size;k++) { 1339 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1340 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1341 } 1342 } 1343 } else { 1344 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1345 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1346 } 1347 } else { 1348 S = Sarray + cumarray; 1349 St = Starray + cumarray; 1350 } 1351 /* see if we can save some work */ 1352 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1353 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1354 } 1355 1356 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1357 B_neigs = 0; 1358 } else { 1359 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1360 PetscBLASInt B_itype = 1; 1361 PetscBLASInt B_IL, B_IU; 1362 PetscReal eps = -1.0; /* dlamch? */ 1363 PetscInt nmin_s; 1364 PetscBool compute_range = PETSC_FALSE; 1365 1366 if (pcbddc->dbg_flag) { 1367 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1368 } 1369 1370 compute_range = PETSC_FALSE; 1371 if (thresh > 1.+PETSC_SMALL && !same_data) { 1372 compute_range = PETSC_TRUE; 1373 } 1374 1375 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1376 if (compute_range) { 1377 1378 /* ask for eigenvalues larger than thresh */ 1379 #if defined(PETSC_USE_COMPLEX) 1380 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1381 #else 1382 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1383 #endif 1384 } else if (!same_data) { 1385 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1386 B_IL = 1; 1387 #if defined(PETSC_USE_COMPLEX) 1388 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1389 #else 1390 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1391 #endif 1392 } else { /* same_data is true, so get the adaptive function requested by the user */ 1393 PetscInt k; 1394 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1395 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1396 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1397 nmin = nmax; 1398 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1399 for (k=0;k<nmax;k++) { 1400 eigs[k] = 1./PETSC_SMALL; 1401 eigv[k*(subset_size+1)] = 1.0; 1402 } 1403 } 1404 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1405 if (B_ierr) { 1406 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1407 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1408 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1409 } 1410 1411 if (B_neigs > nmax) { 1412 if (pcbddc->dbg_flag) { 1413 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1414 } 1415 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1416 B_neigs = nmax; 1417 } 1418 1419 nmin_s = PetscMin(nmin,B_N); 1420 if (B_neigs < nmin_s) { 1421 PetscBLASInt B_neigs2; 1422 1423 if (pcbddc->use_deluxe_scaling) { 1424 B_IL = B_N - nmin_s + 1; 1425 B_IU = B_N - B_neigs; 1426 } else { 1427 B_IL = B_neigs + 1; 1428 B_IU = nmin_s; 1429 } 1430 if (pcbddc->dbg_flag) { 1431 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1432 } 1433 if (sub_schurs->is_hermitian) { 1434 PetscInt j; 1435 for (j=0;j<subset_size;j++) { 1436 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1437 } 1438 for (j=0;j<subset_size;j++) { 1439 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1440 } 1441 } else { 1442 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1443 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1444 } 1445 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1446 #if defined(PETSC_USE_COMPLEX) 1447 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1448 #else 1449 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1450 #endif 1451 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1452 B_neigs += B_neigs2; 1453 } 1454 if (B_ierr) { 1455 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1456 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1457 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1458 } 1459 if (pcbddc->dbg_flag) { 1460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1461 for (j=0;j<B_neigs;j++) { 1462 if (eigs[j] == 0.0) { 1463 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1464 } else { 1465 if (pcbddc->use_deluxe_scaling) { 1466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1467 } else { 1468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1469 } 1470 } 1471 } 1472 } 1473 } else { 1474 /* TODO */ 1475 } 1476 } 1477 /* change the basis back to the original one */ 1478 if (sub_schurs->change) { 1479 Mat change,phi,phit; 1480 1481 if (pcbddc->dbg_flag > 1) { 1482 PetscInt ii; 1483 for (ii=0;ii<B_neigs;ii++) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1485 for (j=0;j<B_N;j++) { 1486 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1487 } 1488 } 1489 } 1490 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1491 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1492 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1493 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1494 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1495 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1496 } 1497 maxneigs = PetscMax(B_neigs,maxneigs); 1498 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1499 if (B_neigs) { 1500 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1501 1502 if (pcbddc->dbg_flag > 1) { 1503 PetscInt ii; 1504 for (ii=0;ii<B_neigs;ii++) { 1505 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1506 for (j=0;j<B_N;j++) { 1507 #if defined(PETSC_USE_COMPLEX) 1508 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1509 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1510 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1511 #else 1512 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1513 #endif 1514 } 1515 } 1516 } 1517 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1518 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1519 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1520 cum++; 1521 } 1522 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1523 /* shift for next computation */ 1524 cumarray += subset_size*subset_size; 1525 } 1526 if (pcbddc->dbg_flag) { 1527 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1528 } 1529 1530 if (mss) { 1531 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1532 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1533 /* destroy matrices (junk) */ 1534 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1535 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1536 } 1537 if (allocated_S_St) { 1538 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1539 } 1540 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1541 #if defined(PETSC_USE_COMPLEX) 1542 ierr = PetscFree(rwork);CHKERRQ(ierr); 1543 #endif 1544 if (pcbddc->dbg_flag) { 1545 PetscInt maxneigs_r; 1546 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1547 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1548 } 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "PCBDDCSetUpSolvers" 1554 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1555 { 1556 PetscScalar *coarse_submat_vals; 1557 PetscErrorCode ierr; 1558 1559 PetscFunctionBegin; 1560 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1561 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1562 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1563 1564 /* Setup local neumann solver ksp_R */ 1565 /* PCBDDCSetUpLocalScatters should be called first! */ 1566 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1567 1568 /* 1569 Setup local correction and local part of coarse basis. 1570 Gives back the dense local part of the coarse matrix in column major ordering 1571 */ 1572 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1573 1574 /* Compute total number of coarse nodes and setup coarse solver */ 1575 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1576 1577 /* free */ 1578 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "PCBDDCResetCustomization" 1584 PetscErrorCode PCBDDCResetCustomization(PC pc) 1585 { 1586 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1587 PetscErrorCode ierr; 1588 1589 PetscFunctionBegin; 1590 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1591 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1592 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1593 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1594 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1595 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1596 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1597 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1598 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1599 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1600 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "PCBDDCResetTopography" 1606 PetscErrorCode PCBDDCResetTopography(PC pc) 1607 { 1608 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1609 PetscInt i; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1614 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1615 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1616 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1617 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1618 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1619 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1620 for (i=0;i<pcbddc->n_local_subs;i++) { 1621 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1622 } 1623 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1624 if (pcbddc->sub_schurs) { 1625 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "PCBDDCResetSolvers" 1632 PetscErrorCode PCBDDCResetSolvers(PC pc) 1633 { 1634 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1635 PetscErrorCode ierr; 1636 1637 PetscFunctionBegin; 1638 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1639 if (pcbddc->coarse_phi_B) { 1640 PetscScalar *array; 1641 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1642 ierr = PetscFree(array);CHKERRQ(ierr); 1643 } 1644 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1645 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1646 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1647 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1648 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1649 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1650 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1651 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1652 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1653 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1654 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1655 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1656 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1657 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1658 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1659 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1660 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1661 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1662 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1663 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1664 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1665 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1666 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1667 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1668 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1669 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1670 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1671 if (pcbddc->benign_zerodiag_subs) { 1672 PetscInt i; 1673 for (i=0;i<pcbddc->benign_n;i++) { 1674 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1675 } 1676 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1677 } 1678 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1684 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1685 { 1686 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1687 PC_IS *pcis = (PC_IS*)pc->data; 1688 VecType impVecType; 1689 PetscInt n_constraints,n_R,old_size; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1694 /* get sizes */ 1695 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1696 n_R = pcis->n - pcbddc->n_vertices; 1697 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1698 /* local work vectors (try to avoid unneeded work)*/ 1699 /* R nodes */ 1700 old_size = -1; 1701 if (pcbddc->vec1_R) { 1702 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1703 } 1704 if (n_R != old_size) { 1705 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1706 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1707 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1708 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1709 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1710 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1711 } 1712 /* local primal dofs */ 1713 old_size = -1; 1714 if (pcbddc->vec1_P) { 1715 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1716 } 1717 if (pcbddc->local_primal_size != old_size) { 1718 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1719 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1720 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1721 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1722 } 1723 /* local explicit constraints */ 1724 old_size = -1; 1725 if (pcbddc->vec1_C) { 1726 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1727 } 1728 if (n_constraints && n_constraints != old_size) { 1729 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1730 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1731 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1732 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "PCBDDCSetUpCorrection" 1739 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1740 { 1741 PetscErrorCode ierr; 1742 /* pointers to pcis and pcbddc */ 1743 PC_IS* pcis = (PC_IS*)pc->data; 1744 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1745 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1746 /* submatrices of local problem */ 1747 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1748 /* submatrices of local coarse problem */ 1749 Mat S_VV,S_CV,S_VC,S_CC; 1750 /* working matrices */ 1751 Mat C_CR; 1752 /* additional working stuff */ 1753 PC pc_R; 1754 Mat F; 1755 Vec dummy_vec; 1756 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1757 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1758 PetscScalar *work; 1759 PetscInt *idx_V_B; 1760 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1761 PetscInt i,n_R,n_D,n_B; 1762 1763 /* some shortcuts to scalars */ 1764 PetscScalar one=1.0,m_one=-1.0; 1765 1766 PetscFunctionBegin; 1767 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1768 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1769 } 1770 1771 /* Set Non-overlapping dimensions */ 1772 n_vertices = pcbddc->n_vertices; 1773 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1774 n_B = pcis->n_B; 1775 n_D = pcis->n - n_B; 1776 n_R = pcis->n - n_vertices; 1777 1778 /* vertices in boundary numbering */ 1779 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1780 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1781 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1782 1783 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1784 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1786 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1787 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1788 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1790 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1791 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1792 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1793 1794 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1795 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1796 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1797 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1798 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1799 lda_rhs = n_R; 1800 need_benign_correction = PETSC_FALSE; 1801 if (isLU || isILU || isCHOL) { 1802 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1803 } else if (sub_schurs && sub_schurs->reuse_solver) { 1804 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1805 MatFactorType type; 1806 1807 F = reuse_solver->F; 1808 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1809 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1810 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1811 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1812 } else { 1813 F = NULL; 1814 } 1815 1816 /* allocate workspace */ 1817 n = 0; 1818 if (n_constraints) { 1819 n += lda_rhs*n_constraints; 1820 } 1821 if (n_vertices) { 1822 n = PetscMax(2*lda_rhs*n_vertices,n); 1823 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1824 } 1825 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1826 1827 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1828 dummy_vec = NULL; 1829 if (need_benign_correction && lda_rhs != n_R && F) { 1830 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1831 } 1832 1833 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1834 if (n_constraints) { 1835 Mat M1,M2,M3,C_B; 1836 IS is_aux; 1837 PetscScalar *array,*array2; 1838 1839 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1840 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1841 1842 /* Extract constraints on R nodes: C_{CR} */ 1843 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1844 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1845 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1846 1847 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1848 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1849 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1850 for (i=0;i<n_constraints;i++) { 1851 const PetscScalar *row_cmat_values; 1852 const PetscInt *row_cmat_indices; 1853 PetscInt size_of_constraint,j; 1854 1855 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1856 for (j=0;j<size_of_constraint;j++) { 1857 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1858 } 1859 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1860 } 1861 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1862 if (F) { 1863 Mat B; 1864 1865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1866 if (need_benign_correction) { 1867 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1868 1869 /* rhs is already zero on interior dofs, no need to change the rhs */ 1870 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1871 } 1872 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1873 if (need_benign_correction) { 1874 PetscScalar *marr; 1875 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1876 1877 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1878 if (lda_rhs != n_R) { 1879 for (i=0;i<n_constraints;i++) { 1880 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1881 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1882 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1883 } 1884 } else { 1885 for (i=0;i<n_constraints;i++) { 1886 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1887 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1888 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1889 } 1890 } 1891 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1892 } 1893 ierr = MatDestroy(&B);CHKERRQ(ierr); 1894 } else { 1895 PetscScalar *marr; 1896 1897 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1898 for (i=0;i<n_constraints;i++) { 1899 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1900 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1901 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1902 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1903 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1904 } 1905 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1906 } 1907 if (!pcbddc->switch_static) { 1908 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1909 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1910 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1911 for (i=0;i<n_constraints;i++) { 1912 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1913 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1914 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1915 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1916 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1917 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1918 } 1919 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1920 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1921 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1922 } else { 1923 if (lda_rhs != n_R) { 1924 IS dummy; 1925 1926 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1927 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1928 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1929 } else { 1930 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1931 pcbddc->local_auxmat2 = local_auxmat2_R; 1932 } 1933 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1934 } 1935 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1936 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1937 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1938 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1939 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1940 if (isCHOL) { 1941 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1942 } else { 1943 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1944 } 1945 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1946 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1947 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1948 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1949 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1950 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1951 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1952 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1953 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1954 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1955 } 1956 1957 /* Get submatrices from subdomain matrix */ 1958 if (n_vertices) { 1959 IS is_aux; 1960 1961 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1962 IS tis; 1963 1964 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1965 ierr = ISSort(tis);CHKERRQ(ierr); 1966 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1967 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1968 } else { 1969 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1970 } 1971 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1972 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1973 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1974 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1975 } 1976 1977 /* Matrix of coarse basis functions (local) */ 1978 if (pcbddc->coarse_phi_B) { 1979 PetscInt on_B,on_primal,on_D=n_D; 1980 if (pcbddc->coarse_phi_D) { 1981 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1982 } 1983 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1984 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1985 PetscScalar *marray; 1986 1987 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1988 ierr = PetscFree(marray);CHKERRQ(ierr); 1989 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1990 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1991 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1992 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1993 } 1994 } 1995 1996 if (!pcbddc->coarse_phi_B) { 1997 PetscScalar *marray; 1998 1999 n = n_B*pcbddc->local_primal_size; 2000 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2001 n += n_D*pcbddc->local_primal_size; 2002 } 2003 if (!pcbddc->symmetric_primal) { 2004 n *= 2; 2005 } 2006 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2007 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2008 n = n_B*pcbddc->local_primal_size; 2009 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2010 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2011 n += n_D*pcbddc->local_primal_size; 2012 } 2013 if (!pcbddc->symmetric_primal) { 2014 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2015 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2016 n = n_B*pcbddc->local_primal_size; 2017 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2018 } 2019 } else { 2020 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2021 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2022 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2023 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2024 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2025 } 2026 } 2027 } 2028 2029 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2030 p0_lidx_I = NULL; 2031 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2032 const PetscInt *idxs; 2033 2034 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2035 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2036 for (i=0;i<pcbddc->benign_n;i++) { 2037 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2038 } 2039 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2040 } 2041 2042 /* vertices */ 2043 if (n_vertices) { 2044 2045 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2046 2047 if (n_R) { 2048 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2049 PetscBLASInt B_N,B_one = 1; 2050 PetscScalar *x,*y; 2051 PetscBool isseqaij; 2052 2053 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2054 if (need_benign_correction) { 2055 ISLocalToGlobalMapping RtoN; 2056 IS is_p0; 2057 PetscInt *idxs_p0,n; 2058 2059 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2060 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2061 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2062 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2063 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2064 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2065 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2066 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2067 } 2068 2069 if (lda_rhs == n_R) { 2070 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2071 } else { 2072 PetscScalar *av,*array; 2073 const PetscInt *xadj,*adjncy; 2074 PetscInt n; 2075 PetscBool flg_row; 2076 2077 array = work+lda_rhs*n_vertices; 2078 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2079 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2080 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2081 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2082 for (i=0;i<n;i++) { 2083 PetscInt j; 2084 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2085 } 2086 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2087 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2088 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2089 } 2090 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2091 if (need_benign_correction) { 2092 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2093 PetscScalar *marr; 2094 2095 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2096 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2097 2098 | 0 0 0 | (V) 2099 L = | 0 0 -1 | (P-p0) 2100 | 0 0 -1 | (p0) 2101 2102 */ 2103 for (i=0;i<reuse_solver->benign_n;i++) { 2104 const PetscScalar *vals; 2105 const PetscInt *idxs,*idxs_zero; 2106 PetscInt n,j,nz; 2107 2108 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2109 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2110 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2111 for (j=0;j<n;j++) { 2112 PetscScalar val = vals[j]; 2113 PetscInt k,col = idxs[j]; 2114 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2115 } 2116 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2117 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2118 } 2119 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2120 } 2121 if (F) { 2122 /* need to correct the rhs */ 2123 if (need_benign_correction) { 2124 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2125 PetscScalar *marr; 2126 2127 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2128 if (lda_rhs != n_R) { 2129 for (i=0;i<n_vertices;i++) { 2130 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2131 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2132 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2133 } 2134 } else { 2135 for (i=0;i<n_vertices;i++) { 2136 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2137 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2138 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2139 } 2140 } 2141 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2142 } 2143 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2144 /* need to correct the solution */ 2145 if (need_benign_correction) { 2146 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2147 PetscScalar *marr; 2148 2149 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2150 if (lda_rhs != n_R) { 2151 for (i=0;i<n_vertices;i++) { 2152 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2153 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2154 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2155 } 2156 } else { 2157 for (i=0;i<n_vertices;i++) { 2158 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2159 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2160 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2161 } 2162 } 2163 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2164 } 2165 } else { 2166 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2167 for (i=0;i<n_vertices;i++) { 2168 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2169 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2170 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2171 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2172 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2173 } 2174 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2175 } 2176 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2177 /* S_VV and S_CV */ 2178 if (n_constraints) { 2179 Mat B; 2180 2181 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2182 for (i=0;i<n_vertices;i++) { 2183 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2184 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2185 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2186 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2187 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2188 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2189 } 2190 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2191 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2192 ierr = MatDestroy(&B);CHKERRQ(ierr); 2193 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2194 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2195 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2196 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2197 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2198 ierr = MatDestroy(&B);CHKERRQ(ierr); 2199 } 2200 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2201 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2202 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2203 } 2204 if (lda_rhs != n_R) { 2205 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2206 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2207 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2208 } 2209 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2210 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2211 if (need_benign_correction) { 2212 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2213 PetscScalar *marr,*sums; 2214 2215 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2216 ierr = MatDenseGetArray(S_VVt,&marr); 2217 for (i=0;i<reuse_solver->benign_n;i++) { 2218 const PetscScalar *vals; 2219 const PetscInt *idxs,*idxs_zero; 2220 PetscInt n,j,nz; 2221 2222 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2223 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2224 for (j=0;j<n_vertices;j++) { 2225 PetscInt k; 2226 sums[j] = 0.; 2227 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2228 } 2229 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2230 for (j=0;j<n;j++) { 2231 PetscScalar val = vals[j]; 2232 PetscInt k; 2233 for (k=0;k<n_vertices;k++) { 2234 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2235 } 2236 } 2237 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2238 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2239 } 2240 ierr = PetscFree(sums);CHKERRQ(ierr); 2241 ierr = MatDenseRestoreArray(S_VVt,&marr); 2242 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2243 } 2244 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2245 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2246 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2247 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2248 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2249 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2250 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2251 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2252 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2253 } else { 2254 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2255 } 2256 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2257 2258 /* coarse basis functions */ 2259 for (i=0;i<n_vertices;i++) { 2260 PetscScalar *y; 2261 2262 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2263 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2264 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2265 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2266 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2267 y[n_B*i+idx_V_B[i]] = 1.0; 2268 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2269 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2270 2271 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2272 PetscInt j; 2273 2274 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2275 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2276 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2277 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2278 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2279 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2280 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2281 } 2282 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2283 } 2284 /* if n_R == 0 the object is not destroyed */ 2285 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2286 } 2287 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2288 2289 if (n_constraints) { 2290 Mat B; 2291 2292 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2293 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2294 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2295 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2296 if (n_vertices) { 2297 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2298 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2299 } else { 2300 Mat S_VCt; 2301 2302 if (lda_rhs != n_R) { 2303 ierr = MatDestroy(&B);CHKERRQ(ierr); 2304 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2305 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2306 } 2307 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2308 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2309 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2310 } 2311 } 2312 ierr = MatDestroy(&B);CHKERRQ(ierr); 2313 /* coarse basis functions */ 2314 for (i=0;i<n_constraints;i++) { 2315 PetscScalar *y; 2316 2317 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2318 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2319 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2320 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2321 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2322 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2323 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2324 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2325 PetscInt j; 2326 2327 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2328 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2329 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2330 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2331 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2332 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2333 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2334 } 2335 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2336 } 2337 } 2338 if (n_constraints) { 2339 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2340 } 2341 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2342 2343 /* coarse matrix entries relative to B_0 */ 2344 if (pcbddc->benign_n) { 2345 Mat B0_B,B0_BPHI; 2346 IS is_dummy; 2347 PetscScalar *data; 2348 PetscInt j; 2349 2350 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2351 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2352 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2353 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2354 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2355 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2356 for (j=0;j<pcbddc->benign_n;j++) { 2357 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2358 for (i=0;i<pcbddc->local_primal_size;i++) { 2359 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2360 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2361 } 2362 } 2363 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2364 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2365 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2366 } 2367 2368 /* compute other basis functions for non-symmetric problems */ 2369 if (!pcbddc->symmetric_primal) { 2370 Mat B_V=NULL,B_C=NULL; 2371 PetscScalar *marray; 2372 2373 if (n_constraints) { 2374 Mat S_CCT,C_CRT; 2375 2376 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2377 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2378 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2379 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2380 if (n_vertices) { 2381 Mat S_VCT; 2382 2383 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2384 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2385 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2386 } 2387 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2388 } else { 2389 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2390 } 2391 if (n_vertices && n_R) { 2392 PetscScalar *av,*marray; 2393 const PetscInt *xadj,*adjncy; 2394 PetscInt n; 2395 PetscBool flg_row; 2396 2397 /* B_V = B_V - A_VR^T */ 2398 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2399 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2400 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2401 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2402 for (i=0;i<n;i++) { 2403 PetscInt j; 2404 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2405 } 2406 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2407 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2408 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2409 } 2410 2411 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2412 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2413 for (i=0;i<n_vertices;i++) { 2414 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2415 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2416 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2417 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2418 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2419 } 2420 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2421 if (B_C) { 2422 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2423 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2424 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2425 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2426 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2427 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2428 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2429 } 2430 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2431 } 2432 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2433 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2434 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2435 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2436 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2437 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2438 } 2439 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2440 /* coarse basis functions */ 2441 for (i=0;i<pcbddc->local_primal_size;i++) { 2442 PetscScalar *y; 2443 2444 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2445 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2446 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2447 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2448 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2449 if (i<n_vertices) { 2450 y[n_B*i+idx_V_B[i]] = 1.0; 2451 } 2452 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2453 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2454 2455 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2456 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2457 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2458 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2459 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2460 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2461 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2462 } 2463 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2464 } 2465 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2466 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2467 } 2468 /* free memory */ 2469 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2470 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2471 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2472 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2473 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2474 ierr = PetscFree(work);CHKERRQ(ierr); 2475 if (n_vertices) { 2476 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2477 } 2478 if (n_constraints) { 2479 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2480 } 2481 /* Checking coarse_sub_mat and coarse basis functios */ 2482 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2483 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2484 if (pcbddc->dbg_flag) { 2485 Mat coarse_sub_mat; 2486 Mat AUXMAT,TM1,TM2,TM3,TM4; 2487 Mat coarse_phi_D,coarse_phi_B; 2488 Mat coarse_psi_D,coarse_psi_B; 2489 Mat A_II,A_BB,A_IB,A_BI; 2490 Mat C_B,CPHI; 2491 IS is_dummy; 2492 Vec mones; 2493 MatType checkmattype=MATSEQAIJ; 2494 PetscReal real_value; 2495 2496 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2497 Mat A; 2498 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2499 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2500 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2501 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2502 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2503 ierr = MatDestroy(&A);CHKERRQ(ierr); 2504 } else { 2505 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2506 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2507 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2508 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2509 } 2510 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2511 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2512 if (!pcbddc->symmetric_primal) { 2513 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2514 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2515 } 2516 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2517 2518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2519 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2520 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2521 if (!pcbddc->symmetric_primal) { 2522 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2523 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2524 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2525 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2526 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2527 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2528 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2529 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2530 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2531 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2532 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2533 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2534 } else { 2535 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2536 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2537 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2538 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2539 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2540 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2541 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2542 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2543 } 2544 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2545 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2546 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2547 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2548 if (pcbddc->benign_n) { 2549 Mat B0_B,B0_BPHI; 2550 PetscScalar *data,*data2; 2551 PetscInt j; 2552 2553 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2554 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2555 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2556 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2557 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2558 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2559 for (j=0;j<pcbddc->benign_n;j++) { 2560 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2561 for (i=0;i<pcbddc->local_primal_size;i++) { 2562 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2563 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2564 } 2565 } 2566 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2567 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2568 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2569 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2570 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2571 } 2572 #if 0 2573 { 2574 PetscViewer viewer; 2575 char filename[256]; 2576 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2577 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2578 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2579 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2580 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2581 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2582 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2583 if (save_change) { 2584 Mat phi_B; 2585 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2586 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2587 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2588 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2589 } else { 2590 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2591 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2592 } 2593 if (pcbddc->coarse_phi_D) { 2594 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2595 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2596 } 2597 if (pcbddc->coarse_psi_B) { 2598 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2599 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2600 } 2601 if (pcbddc->coarse_psi_D) { 2602 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2603 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2604 } 2605 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2606 } 2607 #endif 2608 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2609 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2610 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2612 2613 /* check constraints */ 2614 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2615 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2616 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2617 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2618 } else { 2619 PetscScalar *data; 2620 Mat tmat; 2621 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2622 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2623 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2624 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2625 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2626 } 2627 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2628 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2629 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2630 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2632 if (!pcbddc->symmetric_primal) { 2633 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2634 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2635 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2636 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2637 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2638 } 2639 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2640 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2641 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2642 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2643 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2644 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2645 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2646 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2647 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2648 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2649 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2650 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2651 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2652 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2653 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2654 if (!pcbddc->symmetric_primal) { 2655 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2656 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2657 } 2658 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2659 } 2660 /* get back data */ 2661 *coarse_submat_vals_n = coarse_submat_vals; 2662 PetscFunctionReturn(0); 2663 } 2664 2665 #undef __FUNCT__ 2666 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2667 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2668 { 2669 Mat *work_mat; 2670 IS isrow_s,iscol_s; 2671 PetscBool rsorted,csorted; 2672 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2673 PetscErrorCode ierr; 2674 2675 PetscFunctionBegin; 2676 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2677 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2678 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2679 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2680 2681 if (!rsorted) { 2682 const PetscInt *idxs; 2683 PetscInt *idxs_sorted,i; 2684 2685 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2686 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2687 for (i=0;i<rsize;i++) { 2688 idxs_perm_r[i] = i; 2689 } 2690 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2691 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2692 for (i=0;i<rsize;i++) { 2693 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2694 } 2695 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2696 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2697 } else { 2698 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2699 isrow_s = isrow; 2700 } 2701 2702 if (!csorted) { 2703 if (isrow == iscol) { 2704 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2705 iscol_s = isrow_s; 2706 } else { 2707 const PetscInt *idxs; 2708 PetscInt *idxs_sorted,i; 2709 2710 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2711 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2712 for (i=0;i<csize;i++) { 2713 idxs_perm_c[i] = i; 2714 } 2715 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2716 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2717 for (i=0;i<csize;i++) { 2718 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2719 } 2720 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2721 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2722 } 2723 } else { 2724 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2725 iscol_s = iscol; 2726 } 2727 2728 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2729 2730 if (!rsorted || !csorted) { 2731 Mat new_mat; 2732 IS is_perm_r,is_perm_c; 2733 2734 if (!rsorted) { 2735 PetscInt *idxs_r,i; 2736 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2737 for (i=0;i<rsize;i++) { 2738 idxs_r[idxs_perm_r[i]] = i; 2739 } 2740 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2741 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2742 } else { 2743 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2744 } 2745 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2746 2747 if (!csorted) { 2748 if (isrow_s == iscol_s) { 2749 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2750 is_perm_c = is_perm_r; 2751 } else { 2752 PetscInt *idxs_c,i; 2753 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2754 for (i=0;i<csize;i++) { 2755 idxs_c[idxs_perm_c[i]] = i; 2756 } 2757 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2758 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2759 } 2760 } else { 2761 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2762 } 2763 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2764 2765 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2766 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2767 work_mat[0] = new_mat; 2768 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2769 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2770 } 2771 2772 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2773 *B = work_mat[0]; 2774 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2775 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2776 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2782 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2783 { 2784 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2785 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2786 Mat new_mat; 2787 IS is_local,is_global; 2788 PetscInt local_size; 2789 PetscBool isseqaij; 2790 PetscErrorCode ierr; 2791 2792 PetscFunctionBegin; 2793 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2794 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2795 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2796 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2797 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2798 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2799 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2800 2801 /* check */ 2802 if (pcbddc->dbg_flag) { 2803 Vec x,x_change; 2804 PetscReal error; 2805 2806 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2807 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2808 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2809 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2810 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2811 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2812 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2813 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2814 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2815 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2817 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2818 ierr = VecDestroy(&x);CHKERRQ(ierr); 2819 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2820 } 2821 2822 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2823 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2824 if (isseqaij) { 2825 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2826 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2827 } else { 2828 Mat work_mat; 2829 2830 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2831 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2832 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2833 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2834 } 2835 if (matis->A->symmetric_set) { 2836 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2837 #if !defined(PETSC_USE_COMPLEX) 2838 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2839 #endif 2840 } 2841 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2842 PetscFunctionReturn(0); 2843 } 2844 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2847 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2848 { 2849 PC_IS* pcis = (PC_IS*)(pc->data); 2850 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2851 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2852 PetscInt *idx_R_local=NULL; 2853 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2854 PetscInt vbs,bs; 2855 PetscBT bitmask=NULL; 2856 PetscErrorCode ierr; 2857 2858 PetscFunctionBegin; 2859 /* 2860 No need to setup local scatters if 2861 - primal space is unchanged 2862 AND 2863 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2864 AND 2865 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2866 */ 2867 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2868 PetscFunctionReturn(0); 2869 } 2870 /* destroy old objects */ 2871 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2872 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2873 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2874 /* Set Non-overlapping dimensions */ 2875 n_B = pcis->n_B; 2876 n_D = pcis->n - n_B; 2877 n_vertices = pcbddc->n_vertices; 2878 2879 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2880 2881 /* create auxiliary bitmask and allocate workspace */ 2882 if (!sub_schurs || !sub_schurs->reuse_solver) { 2883 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2884 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2885 for (i=0;i<n_vertices;i++) { 2886 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2887 } 2888 2889 for (i=0, n_R=0; i<pcis->n; i++) { 2890 if (!PetscBTLookup(bitmask,i)) { 2891 idx_R_local[n_R++] = i; 2892 } 2893 } 2894 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2895 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2896 2897 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2898 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2899 } 2900 2901 /* Block code */ 2902 vbs = 1; 2903 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2904 if (bs>1 && !(n_vertices%bs)) { 2905 PetscBool is_blocked = PETSC_TRUE; 2906 PetscInt *vary; 2907 if (!sub_schurs || !sub_schurs->reuse_solver) { 2908 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2909 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2910 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2911 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2912 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2913 for (i=0; i<pcis->n/bs; i++) { 2914 if (vary[i]!=0 && vary[i]!=bs) { 2915 is_blocked = PETSC_FALSE; 2916 break; 2917 } 2918 } 2919 ierr = PetscFree(vary);CHKERRQ(ierr); 2920 } else { 2921 /* Verify directly the R set */ 2922 for (i=0; i<n_R/bs; i++) { 2923 PetscInt j,node=idx_R_local[bs*i]; 2924 for (j=1; j<bs; j++) { 2925 if (node != idx_R_local[bs*i+j]-j) { 2926 is_blocked = PETSC_FALSE; 2927 break; 2928 } 2929 } 2930 } 2931 } 2932 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2933 vbs = bs; 2934 for (i=0;i<n_R/vbs;i++) { 2935 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2936 } 2937 } 2938 } 2939 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2940 if (sub_schurs && sub_schurs->reuse_solver) { 2941 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2942 2943 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2944 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2945 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2946 reuse_solver->is_R = pcbddc->is_R_local; 2947 } else { 2948 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2949 } 2950 2951 /* print some info if requested */ 2952 if (pcbddc->dbg_flag) { 2953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2954 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2955 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2960 } 2961 2962 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2963 if (!sub_schurs || !sub_schurs->reuse_solver) { 2964 IS is_aux1,is_aux2; 2965 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2966 2967 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2968 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2969 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2970 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2971 for (i=0; i<n_D; i++) { 2972 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2973 } 2974 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2975 for (i=0, j=0; i<n_R; i++) { 2976 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2977 aux_array1[j++] = i; 2978 } 2979 } 2980 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2981 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2982 for (i=0, j=0; i<n_B; i++) { 2983 if (!PetscBTLookup(bitmask,is_indices[i])) { 2984 aux_array2[j++] = i; 2985 } 2986 } 2987 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2988 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2989 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2990 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2991 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2992 2993 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2994 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2995 for (i=0, j=0; i<n_R; i++) { 2996 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2997 aux_array1[j++] = i; 2998 } 2999 } 3000 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3001 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3002 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3003 } 3004 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3005 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3006 } else { 3007 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3008 IS tis; 3009 PetscInt schur_size; 3010 3011 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3012 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3013 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3014 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3015 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3016 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3017 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3018 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3019 } 3020 } 3021 PetscFunctionReturn(0); 3022 } 3023 3024 3025 #undef __FUNCT__ 3026 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3027 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3028 { 3029 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3030 PC_IS *pcis = (PC_IS*)pc->data; 3031 PC pc_temp; 3032 Mat A_RR; 3033 MatReuse reuse; 3034 PetscScalar m_one = -1.0; 3035 PetscReal value; 3036 PetscInt n_D,n_R; 3037 PetscBool check_corr[2],issbaij; 3038 PetscErrorCode ierr; 3039 /* prefixes stuff */ 3040 char dir_prefix[256],neu_prefix[256],str_level[16]; 3041 size_t len; 3042 3043 PetscFunctionBegin; 3044 3045 /* compute prefixes */ 3046 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3047 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3048 if (!pcbddc->current_level) { 3049 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3050 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3051 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3052 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3053 } else { 3054 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3055 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3056 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3057 len -= 15; /* remove "pc_bddc_coarse_" */ 3058 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3059 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3060 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3061 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3062 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3063 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3064 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3065 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3066 } 3067 3068 /* DIRICHLET PROBLEM */ 3069 if (dirichlet) { 3070 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3071 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3072 if (!sub_schurs || !sub_schurs->reuse_solver) { 3073 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3074 } 3075 if (pcbddc->dbg_flag) { 3076 Mat A_IIn; 3077 3078 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3079 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3080 pcis->A_II = A_IIn; 3081 } 3082 } 3083 if (pcbddc->local_mat->symmetric_set) { 3084 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3085 } 3086 /* Matrix for Dirichlet problem is pcis->A_II */ 3087 n_D = pcis->n - pcis->n_B; 3088 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3089 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3090 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3091 /* default */ 3092 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3093 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3094 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3095 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3096 if (issbaij) { 3097 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3098 } else { 3099 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3100 } 3101 /* Allow user's customization */ 3102 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3103 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3104 } 3105 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3106 if (sub_schurs && sub_schurs->reuse_solver) { 3107 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3108 3109 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3110 } 3111 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3112 if (!n_D) { 3113 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3114 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3115 } 3116 /* Set Up KSP for Dirichlet problem of BDDC */ 3117 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3118 /* set ksp_D into pcis data */ 3119 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3120 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3121 pcis->ksp_D = pcbddc->ksp_D; 3122 } 3123 3124 /* NEUMANN PROBLEM */ 3125 A_RR = 0; 3126 if (neumann) { 3127 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3128 PetscInt ibs,mbs; 3129 PetscBool issbaij; 3130 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3131 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3132 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3133 if (pcbddc->ksp_R) { /* already created ksp */ 3134 PetscInt nn_R; 3135 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3136 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3137 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3138 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3139 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3140 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3141 reuse = MAT_INITIAL_MATRIX; 3142 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3143 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3144 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3145 reuse = MAT_INITIAL_MATRIX; 3146 } else { /* safe to reuse the matrix */ 3147 reuse = MAT_REUSE_MATRIX; 3148 } 3149 } 3150 /* last check */ 3151 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3152 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3153 reuse = MAT_INITIAL_MATRIX; 3154 } 3155 } else { /* first time, so we need to create the matrix */ 3156 reuse = MAT_INITIAL_MATRIX; 3157 } 3158 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3159 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3160 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3161 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3162 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3163 if (matis->A == pcbddc->local_mat) { 3164 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3165 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3166 } else { 3167 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3168 } 3169 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3170 if (matis->A == pcbddc->local_mat) { 3171 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3172 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3173 } else { 3174 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3175 } 3176 } 3177 /* extract A_RR */ 3178 if (sub_schurs && sub_schurs->reuse_solver) { 3179 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3180 3181 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3182 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3183 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3184 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3185 } else { 3186 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3187 } 3188 } else { 3189 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3190 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3191 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3192 } 3193 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3194 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3195 } 3196 if (pcbddc->local_mat->symmetric_set) { 3197 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3198 } 3199 if (!pcbddc->ksp_R) { /* create object if not present */ 3200 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3201 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3202 /* default */ 3203 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3204 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3205 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3206 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3207 if (issbaij) { 3208 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3209 } else { 3210 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3211 } 3212 /* Allow user's customization */ 3213 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3214 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3215 } 3216 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3217 if (!n_R) { 3218 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3219 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3220 } 3221 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3222 /* Reuse solver if it is present */ 3223 if (sub_schurs && sub_schurs->reuse_solver) { 3224 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3225 3226 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3227 } 3228 /* Set Up KSP for Neumann problem of BDDC */ 3229 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3230 } 3231 3232 if (pcbddc->dbg_flag) { 3233 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3234 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3236 } 3237 3238 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3239 check_corr[0] = check_corr[1] = PETSC_FALSE; 3240 if (pcbddc->NullSpace_corr[0]) { 3241 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3242 } 3243 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3244 check_corr[0] = PETSC_TRUE; 3245 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3246 } 3247 if (neumann && pcbddc->NullSpace_corr[2]) { 3248 check_corr[1] = PETSC_TRUE; 3249 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3250 } 3251 3252 /* check Dirichlet and Neumann solvers */ 3253 if (pcbddc->dbg_flag) { 3254 if (dirichlet) { /* Dirichlet */ 3255 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3256 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3257 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3258 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3259 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3261 if (check_corr[0]) { 3262 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3263 } 3264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3265 } 3266 if (neumann) { /* Neumann */ 3267 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3268 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3269 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3270 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3271 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3273 if (check_corr[1]) { 3274 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3275 } 3276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3277 } 3278 } 3279 /* free Neumann problem's matrix */ 3280 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3281 PetscFunctionReturn(0); 3282 } 3283 3284 #undef __FUNCT__ 3285 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3286 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3287 { 3288 PetscErrorCode ierr; 3289 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3290 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3291 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3292 3293 PetscFunctionBegin; 3294 if (!reuse_solver) { 3295 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3296 } 3297 if (!pcbddc->switch_static) { 3298 if (applytranspose && pcbddc->local_auxmat1) { 3299 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3300 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3301 } 3302 if (!reuse_solver) { 3303 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3304 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3305 } else { 3306 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3307 3308 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3309 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3310 } 3311 } else { 3312 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3313 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3314 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3315 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3316 if (applytranspose && pcbddc->local_auxmat1) { 3317 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3318 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3319 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3320 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3321 } 3322 } 3323 if (!reuse_solver || pcbddc->switch_static) { 3324 if (applytranspose) { 3325 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3326 } else { 3327 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3328 } 3329 } else { 3330 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3331 3332 if (applytranspose) { 3333 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3334 } else { 3335 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3336 } 3337 } 3338 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3339 if (!pcbddc->switch_static) { 3340 if (!reuse_solver) { 3341 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3342 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3343 } else { 3344 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3345 3346 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3347 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3348 } 3349 if (!applytranspose && pcbddc->local_auxmat1) { 3350 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3351 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3352 } 3353 } else { 3354 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3355 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3356 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3357 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3358 if (!applytranspose && pcbddc->local_auxmat1) { 3359 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3360 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3361 } 3362 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3363 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3364 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3365 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3366 } 3367 PetscFunctionReturn(0); 3368 } 3369 3370 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3371 #undef __FUNCT__ 3372 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3373 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3374 { 3375 PetscErrorCode ierr; 3376 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3377 PC_IS* pcis = (PC_IS*) (pc->data); 3378 const PetscScalar zero = 0.0; 3379 3380 PetscFunctionBegin; 3381 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3382 if (!pcbddc->benign_apply_coarse_only) { 3383 if (applytranspose) { 3384 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3385 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3386 } else { 3387 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3388 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3389 } 3390 } else { 3391 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3392 } 3393 3394 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3395 if (pcbddc->benign_n) { 3396 PetscScalar *array; 3397 PetscInt j; 3398 3399 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3400 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3401 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3402 } 3403 3404 /* start communications from local primal nodes to rhs of coarse solver */ 3405 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3406 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3407 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3408 3409 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3410 if (pcbddc->coarse_ksp) { 3411 Mat coarse_mat; 3412 Vec rhs,sol; 3413 MatNullSpace nullsp; 3414 PetscBool isbddc = PETSC_FALSE; 3415 3416 if (pcbddc->benign_have_null) { 3417 PC coarse_pc; 3418 3419 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3420 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3421 /* we need to propagate to coarser levels the need for a possible benign correction */ 3422 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3423 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3424 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3425 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3426 } 3427 } 3428 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3429 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3430 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3431 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3432 if (nullsp) { 3433 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3434 } 3435 if (applytranspose) { 3436 if (pcbddc->benign_apply_coarse_only) { 3437 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3438 } else { 3439 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3440 } 3441 } else { 3442 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3443 PC coarse_pc; 3444 3445 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3446 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3447 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3448 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3449 } else { 3450 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3451 } 3452 } 3453 /* we don't need the benign correction at coarser levels anymore */ 3454 if (pcbddc->benign_have_null && isbddc) { 3455 PC coarse_pc; 3456 PC_BDDC* coarsepcbddc; 3457 3458 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3459 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3460 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3461 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3462 } 3463 if (nullsp) { 3464 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3465 } 3466 } 3467 3468 /* Local solution on R nodes */ 3469 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3470 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3471 } 3472 /* communications from coarse sol to local primal nodes */ 3473 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3474 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3475 3476 /* Sum contributions from the two levels */ 3477 if (!pcbddc->benign_apply_coarse_only) { 3478 if (applytranspose) { 3479 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3480 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3481 } else { 3482 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3483 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3484 } 3485 /* store p0 */ 3486 if (pcbddc->benign_n) { 3487 PetscScalar *array; 3488 PetscInt j; 3489 3490 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3491 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3492 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3493 } 3494 } else { /* expand the coarse solution */ 3495 if (applytranspose) { 3496 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3497 } else { 3498 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3499 } 3500 } 3501 PetscFunctionReturn(0); 3502 } 3503 3504 #undef __FUNCT__ 3505 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3506 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3507 { 3508 PetscErrorCode ierr; 3509 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3510 PetscScalar *array; 3511 Vec from,to; 3512 3513 PetscFunctionBegin; 3514 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3515 from = pcbddc->coarse_vec; 3516 to = pcbddc->vec1_P; 3517 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3518 Vec tvec; 3519 3520 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3521 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3522 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3523 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3524 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3525 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3526 } 3527 } else { /* from local to global -> put data in coarse right hand side */ 3528 from = pcbddc->vec1_P; 3529 to = pcbddc->coarse_vec; 3530 } 3531 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 #undef __FUNCT__ 3536 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3537 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3538 { 3539 PetscErrorCode ierr; 3540 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3541 PetscScalar *array; 3542 Vec from,to; 3543 3544 PetscFunctionBegin; 3545 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3546 from = pcbddc->coarse_vec; 3547 to = pcbddc->vec1_P; 3548 } else { /* from local to global -> put data in coarse right hand side */ 3549 from = pcbddc->vec1_P; 3550 to = pcbddc->coarse_vec; 3551 } 3552 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3553 if (smode == SCATTER_FORWARD) { 3554 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3555 Vec tvec; 3556 3557 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3558 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3559 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3560 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3561 } 3562 } else { 3563 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3564 ierr = VecResetArray(from);CHKERRQ(ierr); 3565 } 3566 } 3567 PetscFunctionReturn(0); 3568 } 3569 3570 /* uncomment for testing purposes */ 3571 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3572 #undef __FUNCT__ 3573 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3574 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3575 { 3576 PetscErrorCode ierr; 3577 PC_IS* pcis = (PC_IS*)(pc->data); 3578 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3579 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3580 /* one and zero */ 3581 PetscScalar one=1.0,zero=0.0; 3582 /* space to store constraints and their local indices */ 3583 PetscScalar *constraints_data; 3584 PetscInt *constraints_idxs,*constraints_idxs_B; 3585 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3586 PetscInt *constraints_n; 3587 /* iterators */ 3588 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3589 /* BLAS integers */ 3590 PetscBLASInt lwork,lierr; 3591 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3592 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3593 /* reuse */ 3594 PetscInt olocal_primal_size,olocal_primal_size_cc; 3595 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3596 /* change of basis */ 3597 PetscBool qr_needed; 3598 PetscBT change_basis,qr_needed_idx; 3599 /* auxiliary stuff */ 3600 PetscInt *nnz,*is_indices; 3601 PetscInt ncc; 3602 /* some quantities */ 3603 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3604 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3605 3606 PetscFunctionBegin; 3607 /* Destroy Mat objects computed previously */ 3608 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3609 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3610 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3611 /* save info on constraints from previous setup (if any) */ 3612 olocal_primal_size = pcbddc->local_primal_size; 3613 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3614 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3615 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3616 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3617 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3618 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3619 3620 if (!pcbddc->adaptive_selection) { 3621 IS ISForVertices,*ISForFaces,*ISForEdges; 3622 MatNullSpace nearnullsp; 3623 const Vec *nearnullvecs; 3624 Vec *localnearnullsp; 3625 PetscScalar *array; 3626 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3627 PetscBool nnsp_has_cnst; 3628 /* LAPACK working arrays for SVD or POD */ 3629 PetscBool skip_lapack,boolforchange; 3630 PetscScalar *work; 3631 PetscReal *singular_vals; 3632 #if defined(PETSC_USE_COMPLEX) 3633 PetscReal *rwork; 3634 #endif 3635 #if defined(PETSC_MISSING_LAPACK_GESVD) 3636 PetscScalar *temp_basis,*correlation_mat; 3637 #else 3638 PetscBLASInt dummy_int=1; 3639 PetscScalar dummy_scalar=1.; 3640 #endif 3641 3642 /* Get index sets for faces, edges and vertices from graph */ 3643 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3644 /* print some info */ 3645 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3646 PetscInt nv; 3647 3648 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3649 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3650 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3651 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3654 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3655 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3656 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3657 } 3658 3659 /* free unneeded index sets */ 3660 if (!pcbddc->use_vertices) { 3661 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3662 } 3663 if (!pcbddc->use_edges) { 3664 for (i=0;i<n_ISForEdges;i++) { 3665 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3666 } 3667 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3668 n_ISForEdges = 0; 3669 } 3670 if (!pcbddc->use_faces) { 3671 for (i=0;i<n_ISForFaces;i++) { 3672 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3673 } 3674 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3675 n_ISForFaces = 0; 3676 } 3677 3678 /* check if near null space is attached to global mat */ 3679 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3680 if (nearnullsp) { 3681 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3682 /* remove any stored info */ 3683 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3684 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3685 /* store information for BDDC solver reuse */ 3686 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3687 pcbddc->onearnullspace = nearnullsp; 3688 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3689 for (i=0;i<nnsp_size;i++) { 3690 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3691 } 3692 } else { /* if near null space is not provided BDDC uses constants by default */ 3693 nnsp_size = 0; 3694 nnsp_has_cnst = PETSC_TRUE; 3695 } 3696 /* get max number of constraints on a single cc */ 3697 max_constraints = nnsp_size; 3698 if (nnsp_has_cnst) max_constraints++; 3699 3700 /* 3701 Evaluate maximum storage size needed by the procedure 3702 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3703 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3704 There can be multiple constraints per connected component 3705 */ 3706 n_vertices = 0; 3707 if (ISForVertices) { 3708 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3709 } 3710 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3711 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3712 3713 total_counts = n_ISForFaces+n_ISForEdges; 3714 total_counts *= max_constraints; 3715 total_counts += n_vertices; 3716 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3717 3718 total_counts = 0; 3719 max_size_of_constraint = 0; 3720 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3721 IS used_is; 3722 if (i<n_ISForEdges) { 3723 used_is = ISForEdges[i]; 3724 } else { 3725 used_is = ISForFaces[i-n_ISForEdges]; 3726 } 3727 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3728 total_counts += j; 3729 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3730 } 3731 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3732 3733 /* get local part of global near null space vectors */ 3734 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3735 for (k=0;k<nnsp_size;k++) { 3736 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3737 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3738 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3739 } 3740 3741 /* whether or not to skip lapack calls */ 3742 skip_lapack = PETSC_TRUE; 3743 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3744 3745 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3746 if (!skip_lapack) { 3747 PetscScalar temp_work; 3748 3749 #if defined(PETSC_MISSING_LAPACK_GESVD) 3750 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3751 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3752 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3753 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3754 #if defined(PETSC_USE_COMPLEX) 3755 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3756 #endif 3757 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3758 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3759 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3760 lwork = -1; 3761 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3762 #if !defined(PETSC_USE_COMPLEX) 3763 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3764 #else 3765 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3766 #endif 3767 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3768 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3769 #else /* on missing GESVD */ 3770 /* SVD */ 3771 PetscInt max_n,min_n; 3772 max_n = max_size_of_constraint; 3773 min_n = max_constraints; 3774 if (max_size_of_constraint < max_constraints) { 3775 min_n = max_size_of_constraint; 3776 max_n = max_constraints; 3777 } 3778 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3779 #if defined(PETSC_USE_COMPLEX) 3780 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3781 #endif 3782 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3783 lwork = -1; 3784 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3785 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3786 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3787 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3788 #if !defined(PETSC_USE_COMPLEX) 3789 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3790 #else 3791 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3792 #endif 3793 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3794 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3795 #endif /* on missing GESVD */ 3796 /* Allocate optimal workspace */ 3797 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3798 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3799 } 3800 /* Now we can loop on constraining sets */ 3801 total_counts = 0; 3802 constraints_idxs_ptr[0] = 0; 3803 constraints_data_ptr[0] = 0; 3804 /* vertices */ 3805 if (n_vertices) { 3806 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3807 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3808 for (i=0;i<n_vertices;i++) { 3809 constraints_n[total_counts] = 1; 3810 constraints_data[total_counts] = 1.0; 3811 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3812 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3813 total_counts++; 3814 } 3815 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3816 n_vertices = total_counts; 3817 } 3818 3819 /* edges and faces */ 3820 total_counts_cc = total_counts; 3821 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3822 IS used_is; 3823 PetscBool idxs_copied = PETSC_FALSE; 3824 3825 if (ncc<n_ISForEdges) { 3826 used_is = ISForEdges[ncc]; 3827 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3828 } else { 3829 used_is = ISForFaces[ncc-n_ISForEdges]; 3830 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3831 } 3832 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3833 3834 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3835 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3836 /* change of basis should not be performed on local periodic nodes */ 3837 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3838 if (nnsp_has_cnst) { 3839 PetscScalar quad_value; 3840 3841 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3842 idxs_copied = PETSC_TRUE; 3843 3844 if (!pcbddc->use_nnsp_true) { 3845 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3846 } else { 3847 quad_value = 1.0; 3848 } 3849 for (j=0;j<size_of_constraint;j++) { 3850 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3851 } 3852 temp_constraints++; 3853 total_counts++; 3854 } 3855 for (k=0;k<nnsp_size;k++) { 3856 PetscReal real_value; 3857 PetscScalar *ptr_to_data; 3858 3859 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3860 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3861 for (j=0;j<size_of_constraint;j++) { 3862 ptr_to_data[j] = array[is_indices[j]]; 3863 } 3864 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3865 /* check if array is null on the connected component */ 3866 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3867 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3868 if (real_value > 0.0) { /* keep indices and values */ 3869 temp_constraints++; 3870 total_counts++; 3871 if (!idxs_copied) { 3872 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3873 idxs_copied = PETSC_TRUE; 3874 } 3875 } 3876 } 3877 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3878 valid_constraints = temp_constraints; 3879 if (!pcbddc->use_nnsp_true && temp_constraints) { 3880 if (temp_constraints == 1) { /* just normalize the constraint */ 3881 PetscScalar norm,*ptr_to_data; 3882 3883 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3884 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3885 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3886 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3887 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3888 } else { /* perform SVD */ 3889 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3890 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3891 3892 #if defined(PETSC_MISSING_LAPACK_GESVD) 3893 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3894 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3895 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3896 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3897 from that computed using LAPACKgesvd 3898 -> This is due to a different computation of eigenvectors in LAPACKheev 3899 -> The quality of the POD-computed basis will be the same */ 3900 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3901 /* Store upper triangular part of correlation matrix */ 3902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3904 for (j=0;j<temp_constraints;j++) { 3905 for (k=0;k<j+1;k++) { 3906 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3907 } 3908 } 3909 /* compute eigenvalues and eigenvectors of correlation matrix */ 3910 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3911 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3912 #if !defined(PETSC_USE_COMPLEX) 3913 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3914 #else 3915 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3916 #endif 3917 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3918 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3919 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3920 j = 0; 3921 while (j < temp_constraints && singular_vals[j] < tol) j++; 3922 total_counts = total_counts-j; 3923 valid_constraints = temp_constraints-j; 3924 /* scale and copy POD basis into used quadrature memory */ 3925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3926 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3927 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3928 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3929 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3930 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3931 if (j<temp_constraints) { 3932 PetscInt ii; 3933 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3934 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3935 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3936 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3937 for (k=0;k<temp_constraints-j;k++) { 3938 for (ii=0;ii<size_of_constraint;ii++) { 3939 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3940 } 3941 } 3942 } 3943 #else /* on missing GESVD */ 3944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3947 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3948 #if !defined(PETSC_USE_COMPLEX) 3949 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3950 #else 3951 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3952 #endif 3953 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3954 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3955 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3956 k = temp_constraints; 3957 if (k > size_of_constraint) k = size_of_constraint; 3958 j = 0; 3959 while (j < k && singular_vals[k-j-1] < tol) j++; 3960 valid_constraints = k-j; 3961 total_counts = total_counts-temp_constraints+valid_constraints; 3962 #endif /* on missing GESVD */ 3963 } 3964 } 3965 /* update pointers information */ 3966 if (valid_constraints) { 3967 constraints_n[total_counts_cc] = valid_constraints; 3968 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3969 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3970 /* set change_of_basis flag */ 3971 if (boolforchange) { 3972 PetscBTSet(change_basis,total_counts_cc); 3973 } 3974 total_counts_cc++; 3975 } 3976 } 3977 /* free workspace */ 3978 if (!skip_lapack) { 3979 ierr = PetscFree(work);CHKERRQ(ierr); 3980 #if defined(PETSC_USE_COMPLEX) 3981 ierr = PetscFree(rwork);CHKERRQ(ierr); 3982 #endif 3983 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3984 #if defined(PETSC_MISSING_LAPACK_GESVD) 3985 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3986 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3987 #endif 3988 } 3989 for (k=0;k<nnsp_size;k++) { 3990 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3991 } 3992 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3993 /* free index sets of faces, edges and vertices */ 3994 for (i=0;i<n_ISForFaces;i++) { 3995 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3996 } 3997 if (n_ISForFaces) { 3998 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3999 } 4000 for (i=0;i<n_ISForEdges;i++) { 4001 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4002 } 4003 if (n_ISForEdges) { 4004 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4005 } 4006 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4007 } else { 4008 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4009 4010 total_counts = 0; 4011 n_vertices = 0; 4012 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4013 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4014 } 4015 max_constraints = 0; 4016 total_counts_cc = 0; 4017 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4018 total_counts += pcbddc->adaptive_constraints_n[i]; 4019 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4020 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4021 } 4022 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4023 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4024 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4025 constraints_data = pcbddc->adaptive_constraints_data; 4026 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4027 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4028 total_counts_cc = 0; 4029 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4030 if (pcbddc->adaptive_constraints_n[i]) { 4031 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4032 } 4033 } 4034 #if 0 4035 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4036 for (i=0;i<total_counts_cc;i++) { 4037 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4038 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4039 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4040 printf(" %d",constraints_idxs[j]); 4041 } 4042 printf("\n"); 4043 printf("number of cc: %d\n",constraints_n[i]); 4044 } 4045 for (i=0;i<n_vertices;i++) { 4046 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4047 } 4048 for (i=0;i<sub_schurs->n_subs;i++) { 4049 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4050 } 4051 #endif 4052 4053 max_size_of_constraint = 0; 4054 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4055 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4056 /* Change of basis */ 4057 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4058 if (pcbddc->use_change_of_basis) { 4059 for (i=0;i<sub_schurs->n_subs;i++) { 4060 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4061 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4062 } 4063 } 4064 } 4065 } 4066 pcbddc->local_primal_size = total_counts; 4067 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4068 4069 /* map constraints_idxs in boundary numbering */ 4070 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4071 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4072 4073 /* Create constraint matrix */ 4074 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4075 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4076 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4077 4078 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4079 /* determine if a QR strategy is needed for change of basis */ 4080 qr_needed = PETSC_FALSE; 4081 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4082 total_primal_vertices=0; 4083 pcbddc->local_primal_size_cc = 0; 4084 for (i=0;i<total_counts_cc;i++) { 4085 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4086 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4087 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4088 pcbddc->local_primal_size_cc += 1; 4089 } else if (PetscBTLookup(change_basis,i)) { 4090 for (k=0;k<constraints_n[i];k++) { 4091 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4092 } 4093 pcbddc->local_primal_size_cc += constraints_n[i]; 4094 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4095 PetscBTSet(qr_needed_idx,i); 4096 qr_needed = PETSC_TRUE; 4097 } 4098 } else { 4099 pcbddc->local_primal_size_cc += 1; 4100 } 4101 } 4102 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4103 pcbddc->n_vertices = total_primal_vertices; 4104 /* permute indices in order to have a sorted set of vertices */ 4105 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4106 4107 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4108 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4109 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4110 4111 /* nonzero structure of constraint matrix */ 4112 /* and get reference dof for local constraints */ 4113 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4114 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4115 4116 j = total_primal_vertices; 4117 total_counts = total_primal_vertices; 4118 cum = total_primal_vertices; 4119 for (i=n_vertices;i<total_counts_cc;i++) { 4120 if (!PetscBTLookup(change_basis,i)) { 4121 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4122 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4123 cum++; 4124 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4125 for (k=0;k<constraints_n[i];k++) { 4126 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4127 nnz[j+k] = size_of_constraint; 4128 } 4129 j += constraints_n[i]; 4130 } 4131 } 4132 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4133 ierr = PetscFree(nnz);CHKERRQ(ierr); 4134 4135 /* set values in constraint matrix */ 4136 for (i=0;i<total_primal_vertices;i++) { 4137 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4138 } 4139 total_counts = total_primal_vertices; 4140 for (i=n_vertices;i<total_counts_cc;i++) { 4141 if (!PetscBTLookup(change_basis,i)) { 4142 PetscInt *cols; 4143 4144 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4145 cols = constraints_idxs+constraints_idxs_ptr[i]; 4146 for (k=0;k<constraints_n[i];k++) { 4147 PetscInt row = total_counts+k; 4148 PetscScalar *vals; 4149 4150 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4151 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4152 } 4153 total_counts += constraints_n[i]; 4154 } 4155 } 4156 /* assembling */ 4157 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4158 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4159 4160 /* 4161 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4162 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4163 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4164 */ 4165 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4166 if (pcbddc->use_change_of_basis) { 4167 /* dual and primal dofs on a single cc */ 4168 PetscInt dual_dofs,primal_dofs; 4169 /* working stuff for GEQRF */ 4170 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4171 PetscBLASInt lqr_work; 4172 /* working stuff for UNGQR */ 4173 PetscScalar *gqr_work,lgqr_work_t; 4174 PetscBLASInt lgqr_work; 4175 /* working stuff for TRTRS */ 4176 PetscScalar *trs_rhs; 4177 PetscBLASInt Blas_NRHS; 4178 /* pointers for values insertion into change of basis matrix */ 4179 PetscInt *start_rows,*start_cols; 4180 PetscScalar *start_vals; 4181 /* working stuff for values insertion */ 4182 PetscBT is_primal; 4183 PetscInt *aux_primal_numbering_B; 4184 /* matrix sizes */ 4185 PetscInt global_size,local_size; 4186 /* temporary change of basis */ 4187 Mat localChangeOfBasisMatrix; 4188 /* extra space for debugging */ 4189 PetscScalar *dbg_work; 4190 4191 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4192 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4193 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4194 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4195 /* nonzeros for local mat */ 4196 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4197 if (!pcbddc->benign_change || pcbddc->fake_change) { 4198 for (i=0;i<pcis->n;i++) nnz[i]=1; 4199 } else { 4200 const PetscInt *ii; 4201 PetscInt n; 4202 PetscBool flg_row; 4203 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4204 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4205 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4206 } 4207 for (i=n_vertices;i<total_counts_cc;i++) { 4208 if (PetscBTLookup(change_basis,i)) { 4209 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4210 if (PetscBTLookup(qr_needed_idx,i)) { 4211 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4212 } else { 4213 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4214 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4215 } 4216 } 4217 } 4218 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4219 ierr = PetscFree(nnz);CHKERRQ(ierr); 4220 /* Set interior change in the matrix */ 4221 if (!pcbddc->benign_change || pcbddc->fake_change) { 4222 for (i=0;i<pcis->n;i++) { 4223 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4224 } 4225 } else { 4226 const PetscInt *ii,*jj; 4227 PetscScalar *aa; 4228 PetscInt n; 4229 PetscBool flg_row; 4230 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4231 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4232 for (i=0;i<n;i++) { 4233 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4234 } 4235 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4236 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4237 } 4238 4239 if (pcbddc->dbg_flag) { 4240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4242 } 4243 4244 4245 /* Now we loop on the constraints which need a change of basis */ 4246 /* 4247 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4248 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4249 4250 Basic blocks of change of basis matrix T computed by 4251 4252 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4253 4254 | 1 0 ... 0 s_1/S | 4255 | 0 1 ... 0 s_2/S | 4256 | ... | 4257 | 0 ... 1 s_{n-1}/S | 4258 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4259 4260 with S = \sum_{i=1}^n s_i^2 4261 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4262 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4263 4264 - QR decomposition of constraints otherwise 4265 */ 4266 if (qr_needed) { 4267 /* space to store Q */ 4268 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4269 /* first we issue queries for optimal work */ 4270 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4271 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4272 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4273 lqr_work = -1; 4274 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4275 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4276 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4277 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4278 lgqr_work = -1; 4279 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4280 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4281 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4282 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4283 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4284 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4285 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4286 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4287 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4288 /* array to store scaling factors for reflectors */ 4289 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4290 /* array to store rhs and solution of triangular solver */ 4291 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4292 /* allocating workspace for check */ 4293 if (pcbddc->dbg_flag) { 4294 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4295 } 4296 } 4297 /* array to store whether a node is primal or not */ 4298 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4299 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4300 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4301 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4302 for (i=0;i<total_primal_vertices;i++) { 4303 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4304 } 4305 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4306 4307 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4308 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4309 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4310 if (PetscBTLookup(change_basis,total_counts)) { 4311 /* get constraint info */ 4312 primal_dofs = constraints_n[total_counts]; 4313 dual_dofs = size_of_constraint-primal_dofs; 4314 4315 if (pcbddc->dbg_flag) { 4316 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4317 } 4318 4319 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4320 4321 /* copy quadrature constraints for change of basis check */ 4322 if (pcbddc->dbg_flag) { 4323 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4324 } 4325 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4326 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4327 4328 /* compute QR decomposition of constraints */ 4329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4330 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4332 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4333 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4334 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4335 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4336 4337 /* explictly compute R^-T */ 4338 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4339 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4340 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4341 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4342 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4343 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4344 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4345 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4346 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4347 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4348 4349 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4350 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4351 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4352 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4353 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4354 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4355 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4356 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4357 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4358 4359 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4360 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4361 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4362 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4363 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4364 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4365 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4366 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4367 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4368 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4369 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4370 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4371 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4372 4373 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4374 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4375 /* insert cols for primal dofs */ 4376 for (j=0;j<primal_dofs;j++) { 4377 start_vals = &qr_basis[j*size_of_constraint]; 4378 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4379 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4380 } 4381 /* insert cols for dual dofs */ 4382 for (j=0,k=0;j<dual_dofs;k++) { 4383 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4384 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4385 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4386 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4387 j++; 4388 } 4389 } 4390 4391 /* check change of basis */ 4392 if (pcbddc->dbg_flag) { 4393 PetscInt ii,jj; 4394 PetscBool valid_qr=PETSC_TRUE; 4395 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4396 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4397 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4398 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4399 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4400 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4401 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4402 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4403 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4404 for (jj=0;jj<size_of_constraint;jj++) { 4405 for (ii=0;ii<primal_dofs;ii++) { 4406 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4407 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4408 } 4409 } 4410 if (!valid_qr) { 4411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4412 for (jj=0;jj<size_of_constraint;jj++) { 4413 for (ii=0;ii<primal_dofs;ii++) { 4414 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4415 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4416 } 4417 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4418 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4419 } 4420 } 4421 } 4422 } else { 4423 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4424 } 4425 } 4426 } else { /* simple transformation block */ 4427 PetscInt row,col; 4428 PetscScalar val,norm; 4429 4430 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4431 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4432 for (j=0;j<size_of_constraint;j++) { 4433 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4434 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4435 if (!PetscBTLookup(is_primal,row_B)) { 4436 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4437 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4438 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4439 } else { 4440 for (k=0;k<size_of_constraint;k++) { 4441 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4442 if (row != col) { 4443 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4444 } else { 4445 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4446 } 4447 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4448 } 4449 } 4450 } 4451 if (pcbddc->dbg_flag) { 4452 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4453 } 4454 } 4455 } else { 4456 if (pcbddc->dbg_flag) { 4457 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4458 } 4459 } 4460 } 4461 4462 /* free workspace */ 4463 if (qr_needed) { 4464 if (pcbddc->dbg_flag) { 4465 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4466 } 4467 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4468 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4469 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4470 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4471 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4472 } 4473 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4474 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4475 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4476 4477 /* assembling of global change of variable */ 4478 if (!pcbddc->fake_change) { 4479 Mat tmat; 4480 PetscInt bs; 4481 4482 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4483 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4484 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4485 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4486 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4487 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4488 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4489 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4490 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4491 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4492 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4493 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4494 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4495 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4496 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4497 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4498 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4499 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4500 4501 /* check */ 4502 if (pcbddc->dbg_flag) { 4503 PetscReal error; 4504 Vec x,x_change; 4505 4506 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4507 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4508 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4509 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4510 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4511 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4512 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4513 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4514 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4515 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4516 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4517 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4518 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4519 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4520 ierr = VecDestroy(&x);CHKERRQ(ierr); 4521 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4522 } 4523 /* adapt sub_schurs computed (if any) */ 4524 if (pcbddc->use_deluxe_scaling) { 4525 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4526 4527 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4528 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4529 } 4530 if (sub_schurs && sub_schurs->S_Ej_all) { 4531 Mat S_new,tmat; 4532 IS is_all_N,is_V_Sall = NULL; 4533 4534 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4535 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4536 if (pcbddc->deluxe_zerorows) { 4537 ISLocalToGlobalMapping NtoSall; 4538 IS is_V; 4539 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4540 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4541 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4542 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4543 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4544 } 4545 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4546 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4547 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4548 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4549 if (pcbddc->deluxe_zerorows) { 4550 const PetscScalar *array; 4551 const PetscInt *idxs_V,*idxs_all; 4552 PetscInt i,n_V; 4553 4554 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4555 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4556 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4557 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4558 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4559 for (i=0;i<n_V;i++) { 4560 PetscScalar val; 4561 PetscInt idx; 4562 4563 idx = idxs_V[i]; 4564 val = array[idxs_all[idxs_V[i]]]; 4565 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4566 } 4567 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4568 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4569 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4570 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4571 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4572 } 4573 sub_schurs->S_Ej_all = S_new; 4574 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4575 if (sub_schurs->sum_S_Ej_all) { 4576 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4577 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4578 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4579 if (pcbddc->deluxe_zerorows) { 4580 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4581 } 4582 sub_schurs->sum_S_Ej_all = S_new; 4583 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4584 } 4585 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4586 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4587 } 4588 /* destroy any change of basis context in sub_schurs */ 4589 if (sub_schurs && sub_schurs->change) { 4590 PetscInt i; 4591 4592 for (i=0;i<sub_schurs->n_subs;i++) { 4593 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4594 } 4595 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4596 } 4597 } 4598 if (pcbddc->switch_static) { /* need to save the local change */ 4599 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4600 } else { 4601 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4602 } 4603 /* determine if any process has changed the pressures locally */ 4604 pcbddc->change_interior = pcbddc->benign_have_null; 4605 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4606 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4607 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4608 pcbddc->use_qr_single = qr_needed; 4609 } 4610 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4611 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4612 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4613 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4614 } else { 4615 Mat benign_global = NULL; 4616 if (pcbddc->benign_have_null) { 4617 Mat tmat; 4618 4619 pcbddc->change_interior = PETSC_TRUE; 4620 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4621 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4622 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4623 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4624 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4625 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4626 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4627 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4628 if (pcbddc->benign_change) { 4629 Mat M; 4630 4631 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4632 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4633 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4634 ierr = MatDestroy(&M);CHKERRQ(ierr); 4635 } else { 4636 Mat eye; 4637 PetscScalar *array; 4638 4639 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4640 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4641 for (i=0;i<pcis->n;i++) { 4642 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4643 } 4644 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4645 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4646 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4647 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4648 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4649 } 4650 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4651 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4652 } 4653 if (pcbddc->user_ChangeOfBasisMatrix) { 4654 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4655 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4656 } else if (pcbddc->benign_have_null) { 4657 pcbddc->ChangeOfBasisMatrix = benign_global; 4658 } 4659 } 4660 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4661 IS is_global; 4662 const PetscInt *gidxs; 4663 4664 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4665 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4666 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4667 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4668 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4669 } 4670 } 4671 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4672 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4673 } 4674 4675 if (!pcbddc->fake_change) { 4676 /* add pressure dofs to set of primal nodes for numbering purposes */ 4677 for (i=0;i<pcbddc->benign_n;i++) { 4678 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4679 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4680 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4681 pcbddc->local_primal_size_cc++; 4682 pcbddc->local_primal_size++; 4683 } 4684 4685 /* check if a new primal space has been introduced (also take into account benign trick) */ 4686 pcbddc->new_primal_space_local = PETSC_TRUE; 4687 if (olocal_primal_size == pcbddc->local_primal_size) { 4688 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4689 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4690 if (!pcbddc->new_primal_space_local) { 4691 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4692 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4693 } 4694 } 4695 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4696 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4697 } 4698 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4699 4700 /* flush dbg viewer */ 4701 if (pcbddc->dbg_flag) { 4702 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4703 } 4704 4705 /* free workspace */ 4706 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4707 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4708 if (!pcbddc->adaptive_selection) { 4709 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4710 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4711 } else { 4712 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4713 pcbddc->adaptive_constraints_idxs_ptr, 4714 pcbddc->adaptive_constraints_data_ptr, 4715 pcbddc->adaptive_constraints_idxs, 4716 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4717 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4718 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4719 } 4720 PetscFunctionReturn(0); 4721 } 4722 4723 #undef __FUNCT__ 4724 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4725 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4726 { 4727 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4728 PC_IS *pcis = (PC_IS*)pc->data; 4729 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4730 PetscInt ierr,i,N; 4731 4732 PetscFunctionBegin; 4733 /* Reset previously computed graph */ 4734 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4735 /* Init local Graph struct */ 4736 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4737 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4738 4739 /* Check validity of the csr graph passed in by the user */ 4740 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4741 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4742 } 4743 4744 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4745 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4746 PetscInt *xadj,*adjncy; 4747 PetscInt nvtxs; 4748 PetscBool flg_row=PETSC_FALSE; 4749 4750 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4751 if (flg_row) { 4752 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4753 pcbddc->computed_rowadj = PETSC_TRUE; 4754 } 4755 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4756 } 4757 if (pcbddc->dbg_flag) { 4758 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4759 } 4760 4761 /* Setup of Graph */ 4762 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4763 4764 /* attach info on disconnected subdomains if present */ 4765 if (pcbddc->n_local_subs) { 4766 PetscInt *local_subs; 4767 4768 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4769 for (i=0;i<pcbddc->n_local_subs;i++) { 4770 const PetscInt *idxs; 4771 PetscInt nl,j; 4772 4773 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4774 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4775 for (j=0;j<nl;j++) { 4776 local_subs[idxs[j]] = i; 4777 } 4778 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4779 } 4780 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4781 pcbddc->mat_graph->local_subs = local_subs; 4782 } 4783 4784 /* Graph's connected components analysis */ 4785 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4786 PetscFunctionReturn(0); 4787 } 4788 4789 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4790 #undef __FUNCT__ 4791 #define __FUNCT__ "PCBDDCSubsetNumbering" 4792 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4793 { 4794 PetscSF sf; 4795 PetscLayout map; 4796 const PetscInt *idxs; 4797 PetscInt *leaf_data,*root_data,*gidxs; 4798 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4799 PetscInt n_n,nlocals,start,first_index; 4800 PetscMPIInt commsize; 4801 PetscBool first_found; 4802 PetscErrorCode ierr; 4803 4804 PetscFunctionBegin; 4805 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4806 if (subset_mult) { 4807 PetscCheckSameComm(subset,1,subset_mult,2); 4808 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4809 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4810 } 4811 /* create workspace layout for computing global indices of subset */ 4812 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4813 lbounds[0] = lbounds[1] = 0; 4814 for (i=0;i<n;i++) { 4815 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4816 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4817 } 4818 lbounds[0] = -lbounds[0]; 4819 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4820 gbounds[0] = -gbounds[0]; 4821 N = gbounds[1] - gbounds[0] + 1; 4822 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4823 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4824 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4825 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4826 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4827 4828 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4829 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4830 if (subset_mult) { 4831 const PetscInt* idxs_mult; 4832 4833 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4834 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4835 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4836 } else { 4837 for (i=0;i<n;i++) leaf_data[i] = 1; 4838 } 4839 /* local size of new subset */ 4840 n_n = 0; 4841 for (i=0;i<n;i++) n_n += leaf_data[i]; 4842 4843 /* global indexes in layout */ 4844 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4845 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4846 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4847 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4848 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4849 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4850 4851 /* reduce from leaves to roots */ 4852 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4853 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4854 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4855 4856 /* count indexes in local part of layout */ 4857 nlocals = 0; 4858 first_index = -1; 4859 first_found = PETSC_FALSE; 4860 for (i=0;i<Nl;i++) { 4861 if (!first_found && root_data[i]) { 4862 first_found = PETSC_TRUE; 4863 first_index = i; 4864 } 4865 nlocals += root_data[i]; 4866 } 4867 4868 /* cumulative of number of indexes and size of subset without holes */ 4869 #if defined(PETSC_HAVE_MPI_EXSCAN) 4870 start = 0; 4871 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4872 #else 4873 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4874 start = start-nlocals; 4875 #endif 4876 4877 if (N_n) { /* compute total size of new subset if requested */ 4878 *N_n = start + nlocals; 4879 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4880 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4881 } 4882 4883 /* adapt root data with cumulative */ 4884 if (first_found) { 4885 PetscInt old_index; 4886 4887 root_data[first_index] += start; 4888 old_index = first_index; 4889 for (i=first_index+1;i<Nl;i++) { 4890 if (root_data[i]) { 4891 root_data[i] += root_data[old_index]; 4892 old_index = i; 4893 } 4894 } 4895 } 4896 4897 /* from roots to leaves */ 4898 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4899 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4900 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4901 4902 /* create new IS with global indexes without holes */ 4903 if (subset_mult) { 4904 const PetscInt* idxs_mult; 4905 PetscInt cum; 4906 4907 cum = 0; 4908 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4909 for (i=0;i<n;i++) { 4910 PetscInt j; 4911 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4912 } 4913 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4914 } else { 4915 for (i=0;i<n;i++) { 4916 gidxs[i] = leaf_data[i]-1; 4917 } 4918 } 4919 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4920 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4921 PetscFunctionReturn(0); 4922 } 4923 4924 /* this implements stabilized Gram-Schmidt */ 4925 #undef __FUNCT__ 4926 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4927 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4928 { 4929 PetscInt i,j; 4930 PetscScalar *alphas; 4931 PetscErrorCode ierr; 4932 4933 PetscFunctionBegin; 4934 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4935 for (i=0;i<n;i++) { 4936 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4937 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4938 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4939 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4940 } 4941 ierr = PetscFree(alphas);CHKERRQ(ierr); 4942 PetscFunctionReturn(0); 4943 } 4944 4945 #undef __FUNCT__ 4946 #define __FUNCT__ "MatISGetSubassemblingPattern" 4947 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4948 { 4949 Mat A; 4950 PetscInt n_neighs,*neighs,*n_shared,**shared; 4951 PetscMPIInt size,rank,color; 4952 PetscInt *xadj,*adjncy; 4953 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4954 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4955 PetscInt void_procs,*procs_candidates = NULL; 4956 PetscInt xadj_count, *count; 4957 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4958 PetscSubcomm psubcomm; 4959 MPI_Comm subcomm; 4960 PetscErrorCode ierr; 4961 4962 PetscFunctionBegin; 4963 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4964 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4965 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4966 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4967 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4968 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4969 4970 if (have_void) *have_void = PETSC_FALSE; 4971 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4972 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4973 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4974 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4975 im_active = !!(n); 4976 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4977 void_procs = size - active_procs; 4978 /* get ranks of of non-active processes in mat communicator */ 4979 if (void_procs) { 4980 PetscInt ncand; 4981 4982 if (have_void) *have_void = PETSC_TRUE; 4983 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4984 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4985 for (i=0,ncand=0;i<size;i++) { 4986 if (!procs_candidates[i]) { 4987 procs_candidates[ncand++] = i; 4988 } 4989 } 4990 /* force n_subdomains to be not greater that the number of non-active processes */ 4991 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4992 } 4993 4994 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4995 if (active_procs < *n_subdomains) { 4996 PetscInt issize,isidx; 4997 if (im_active) { 4998 issize = 1; 4999 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5000 isidx = procs_candidates[rank]; 5001 } else { 5002 isidx = rank; 5003 } 5004 } else { 5005 issize = 0; 5006 isidx = -1; 5007 } 5008 *n_subdomains = active_procs; 5009 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5010 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5011 PetscFunctionReturn(0); 5012 } 5013 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5014 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5015 threshold = PetscMax(threshold,2); 5016 5017 /* Get info on mapping */ 5018 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5019 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5020 5021 /* build local CSR graph of subdomains' connectivity */ 5022 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5023 xadj[0] = 0; 5024 xadj[1] = PetscMax(n_neighs-1,0); 5025 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5026 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5027 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5028 for (i=1;i<n_neighs;i++) 5029 for (j=0;j<n_shared[i];j++) 5030 count[shared[i][j]] += 1; 5031 5032 xadj_count = 0; 5033 for (i=1;i<n_neighs;i++) { 5034 for (j=0;j<n_shared[i];j++) { 5035 if (count[shared[i][j]] < threshold) { 5036 adjncy[xadj_count] = neighs[i]; 5037 adjncy_wgt[xadj_count] = n_shared[i]; 5038 xadj_count++; 5039 break; 5040 } 5041 } 5042 } 5043 xadj[1] = xadj_count; 5044 ierr = PetscFree(count);CHKERRQ(ierr); 5045 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5046 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5047 5048 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5049 5050 /* Restrict work on active processes only */ 5051 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5052 if (void_procs) { 5053 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5054 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5055 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5056 subcomm = PetscSubcommChild(psubcomm); 5057 } else { 5058 psubcomm = NULL; 5059 subcomm = PetscObjectComm((PetscObject)mat); 5060 } 5061 5062 v_wgt = NULL; 5063 if (!color) { 5064 ierr = PetscFree(xadj);CHKERRQ(ierr); 5065 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5066 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5067 } else { 5068 Mat subdomain_adj; 5069 IS new_ranks,new_ranks_contig; 5070 MatPartitioning partitioner; 5071 PetscInt rstart=0,rend=0; 5072 PetscInt *is_indices,*oldranks; 5073 PetscMPIInt size; 5074 PetscBool aggregate; 5075 5076 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5077 if (void_procs) { 5078 PetscInt prank = rank; 5079 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5080 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5081 for (i=0;i<xadj[1];i++) { 5082 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5083 } 5084 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5085 } else { 5086 oldranks = NULL; 5087 } 5088 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5089 if (aggregate) { /* TODO: all this part could be made more efficient */ 5090 PetscInt lrows,row,ncols,*cols; 5091 PetscMPIInt nrank; 5092 PetscScalar *vals; 5093 5094 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5095 lrows = 0; 5096 if (nrank<redprocs) { 5097 lrows = size/redprocs; 5098 if (nrank<size%redprocs) lrows++; 5099 } 5100 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5101 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5102 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5103 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5104 row = nrank; 5105 ncols = xadj[1]-xadj[0]; 5106 cols = adjncy; 5107 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5108 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5109 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5110 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5111 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5112 ierr = PetscFree(xadj);CHKERRQ(ierr); 5113 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5114 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5115 ierr = PetscFree(vals);CHKERRQ(ierr); 5116 if (use_vwgt) { 5117 Vec v; 5118 const PetscScalar *array; 5119 PetscInt nl; 5120 5121 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5122 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5123 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5124 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5125 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5126 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5127 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5128 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5129 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5130 ierr = VecDestroy(&v);CHKERRQ(ierr); 5131 } 5132 } else { 5133 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5134 if (use_vwgt) { 5135 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5136 v_wgt[0] = local_size; 5137 } 5138 } 5139 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5140 5141 /* Partition */ 5142 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5143 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5144 if (v_wgt) { 5145 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5146 } 5147 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5148 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5149 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5150 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5151 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5152 5153 /* renumber new_ranks to avoid "holes" in new set of processors */ 5154 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5155 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5156 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5157 if (!aggregate) { 5158 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5159 #if defined(PETSC_USE_DEBUG) 5160 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5161 #endif 5162 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5163 } else if (oldranks) { 5164 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5165 } else { 5166 ranks_send_to_idx[0] = is_indices[0]; 5167 } 5168 } else { 5169 PetscInt idxs[1]; 5170 PetscMPIInt tag; 5171 MPI_Request *reqs; 5172 5173 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5174 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5175 for (i=rstart;i<rend;i++) { 5176 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5177 } 5178 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5179 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5180 ierr = PetscFree(reqs);CHKERRQ(ierr); 5181 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5182 #if defined(PETSC_USE_DEBUG) 5183 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5184 #endif 5185 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5186 } else if (oldranks) { 5187 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5188 } else { 5189 ranks_send_to_idx[0] = idxs[0]; 5190 } 5191 } 5192 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5193 /* clean up */ 5194 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5195 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5196 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5197 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5198 } 5199 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5200 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5201 5202 /* assemble parallel IS for sends */ 5203 i = 1; 5204 if (!color) i=0; 5205 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5206 PetscFunctionReturn(0); 5207 } 5208 5209 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5210 5211 #undef __FUNCT__ 5212 #define __FUNCT__ "MatISSubassemble" 5213 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5214 { 5215 Mat local_mat; 5216 IS is_sends_internal; 5217 PetscInt rows,cols,new_local_rows; 5218 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5219 PetscBool ismatis,isdense,newisdense,destroy_mat; 5220 ISLocalToGlobalMapping l2gmap; 5221 PetscInt* l2gmap_indices; 5222 const PetscInt* is_indices; 5223 MatType new_local_type; 5224 /* buffers */ 5225 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5226 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5227 PetscInt *recv_buffer_idxs_local; 5228 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5229 /* MPI */ 5230 MPI_Comm comm,comm_n; 5231 PetscSubcomm subcomm; 5232 PetscMPIInt n_sends,n_recvs,commsize; 5233 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5234 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5235 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5236 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5237 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5238 PetscErrorCode ierr; 5239 5240 PetscFunctionBegin; 5241 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5242 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5243 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5244 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5245 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5246 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5247 PetscValidLogicalCollectiveBool(mat,reuse,6); 5248 PetscValidLogicalCollectiveInt(mat,nis,8); 5249 5250 /* further checks */ 5251 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5252 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5253 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5254 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5255 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5256 if (reuse && *mat_n) { 5257 PetscInt mrows,mcols,mnrows,mncols; 5258 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5259 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5260 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5261 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5262 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5263 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5264 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5265 } 5266 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5267 PetscValidLogicalCollectiveInt(mat,bs,0); 5268 5269 /* prepare IS for sending if not provided */ 5270 if (!is_sends) { 5271 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5272 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5273 } else { 5274 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5275 is_sends_internal = is_sends; 5276 } 5277 5278 /* get comm */ 5279 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5280 5281 /* compute number of sends */ 5282 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5283 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5284 5285 /* compute number of receives */ 5286 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5287 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5288 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5289 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5290 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5291 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5292 ierr = PetscFree(iflags);CHKERRQ(ierr); 5293 5294 /* restrict comm if requested */ 5295 subcomm = 0; 5296 destroy_mat = PETSC_FALSE; 5297 if (restrict_comm) { 5298 PetscMPIInt color,subcommsize; 5299 5300 color = 0; 5301 if (restrict_full) { 5302 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5303 } else { 5304 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5305 } 5306 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5307 subcommsize = commsize - subcommsize; 5308 /* check if reuse has been requested */ 5309 if (reuse) { 5310 if (*mat_n) { 5311 PetscMPIInt subcommsize2; 5312 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5313 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5314 comm_n = PetscObjectComm((PetscObject)*mat_n); 5315 } else { 5316 comm_n = PETSC_COMM_SELF; 5317 } 5318 } else { /* MAT_INITIAL_MATRIX */ 5319 PetscMPIInt rank; 5320 5321 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5322 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5323 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5324 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5325 comm_n = PetscSubcommChild(subcomm); 5326 } 5327 /* flag to destroy *mat_n if not significative */ 5328 if (color) destroy_mat = PETSC_TRUE; 5329 } else { 5330 comm_n = comm; 5331 } 5332 5333 /* prepare send/receive buffers */ 5334 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5335 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5336 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5337 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5338 if (nis) { 5339 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5340 } 5341 5342 /* Get data from local matrices */ 5343 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5344 /* TODO: See below some guidelines on how to prepare the local buffers */ 5345 /* 5346 send_buffer_vals should contain the raw values of the local matrix 5347 send_buffer_idxs should contain: 5348 - MatType_PRIVATE type 5349 - PetscInt size_of_l2gmap 5350 - PetscInt global_row_indices[size_of_l2gmap] 5351 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5352 */ 5353 else { 5354 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5355 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5356 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5357 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5358 send_buffer_idxs[1] = i; 5359 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5360 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5361 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5362 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5363 for (i=0;i<n_sends;i++) { 5364 ilengths_vals[is_indices[i]] = len*len; 5365 ilengths_idxs[is_indices[i]] = len+2; 5366 } 5367 } 5368 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5369 /* additional is (if any) */ 5370 if (nis) { 5371 PetscMPIInt psum; 5372 PetscInt j; 5373 for (j=0,psum=0;j<nis;j++) { 5374 PetscInt plen; 5375 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5376 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5377 psum += len+1; /* indices + lenght */ 5378 } 5379 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5380 for (j=0,psum=0;j<nis;j++) { 5381 PetscInt plen; 5382 const PetscInt *is_array_idxs; 5383 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5384 send_buffer_idxs_is[psum] = plen; 5385 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5386 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5387 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5388 psum += plen+1; /* indices + lenght */ 5389 } 5390 for (i=0;i<n_sends;i++) { 5391 ilengths_idxs_is[is_indices[i]] = psum; 5392 } 5393 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5394 } 5395 5396 buf_size_idxs = 0; 5397 buf_size_vals = 0; 5398 buf_size_idxs_is = 0; 5399 for (i=0;i<n_recvs;i++) { 5400 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5401 buf_size_vals += (PetscInt)olengths_vals[i]; 5402 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5403 } 5404 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5405 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5406 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5407 5408 /* get new tags for clean communications */ 5409 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5410 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5411 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5412 5413 /* allocate for requests */ 5414 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5415 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5416 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5417 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5418 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5419 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5420 5421 /* communications */ 5422 ptr_idxs = recv_buffer_idxs; 5423 ptr_vals = recv_buffer_vals; 5424 ptr_idxs_is = recv_buffer_idxs_is; 5425 for (i=0;i<n_recvs;i++) { 5426 source_dest = onodes[i]; 5427 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5428 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5429 ptr_idxs += olengths_idxs[i]; 5430 ptr_vals += olengths_vals[i]; 5431 if (nis) { 5432 source_dest = onodes_is[i]; 5433 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5434 ptr_idxs_is += olengths_idxs_is[i]; 5435 } 5436 } 5437 for (i=0;i<n_sends;i++) { 5438 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5439 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5440 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5441 if (nis) { 5442 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5443 } 5444 } 5445 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5446 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5447 5448 /* assemble new l2g map */ 5449 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5450 ptr_idxs = recv_buffer_idxs; 5451 new_local_rows = 0; 5452 for (i=0;i<n_recvs;i++) { 5453 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5454 ptr_idxs += olengths_idxs[i]; 5455 } 5456 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5457 ptr_idxs = recv_buffer_idxs; 5458 new_local_rows = 0; 5459 for (i=0;i<n_recvs;i++) { 5460 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5461 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5462 ptr_idxs += olengths_idxs[i]; 5463 } 5464 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5465 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5466 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5467 5468 /* infer new local matrix type from received local matrices type */ 5469 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5470 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5471 if (n_recvs) { 5472 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5473 ptr_idxs = recv_buffer_idxs; 5474 for (i=0;i<n_recvs;i++) { 5475 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5476 new_local_type_private = MATAIJ_PRIVATE; 5477 break; 5478 } 5479 ptr_idxs += olengths_idxs[i]; 5480 } 5481 switch (new_local_type_private) { 5482 case MATDENSE_PRIVATE: 5483 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5484 new_local_type = MATSEQAIJ; 5485 bs = 1; 5486 } else { /* if I receive only 1 dense matrix */ 5487 new_local_type = MATSEQDENSE; 5488 bs = 1; 5489 } 5490 break; 5491 case MATAIJ_PRIVATE: 5492 new_local_type = MATSEQAIJ; 5493 bs = 1; 5494 break; 5495 case MATBAIJ_PRIVATE: 5496 new_local_type = MATSEQBAIJ; 5497 break; 5498 case MATSBAIJ_PRIVATE: 5499 new_local_type = MATSEQSBAIJ; 5500 break; 5501 default: 5502 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5503 break; 5504 } 5505 } else { /* by default, new_local_type is seqdense */ 5506 new_local_type = MATSEQDENSE; 5507 bs = 1; 5508 } 5509 5510 /* create MATIS object if needed */ 5511 if (!reuse) { 5512 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5513 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5514 } else { 5515 /* it also destroys the local matrices */ 5516 if (*mat_n) { 5517 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5518 } else { /* this is a fake object */ 5519 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5520 } 5521 } 5522 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5523 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5524 5525 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5526 5527 /* Global to local map of received indices */ 5528 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5529 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5530 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5531 5532 /* restore attributes -> type of incoming data and its size */ 5533 buf_size_idxs = 0; 5534 for (i=0;i<n_recvs;i++) { 5535 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5536 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5537 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5538 } 5539 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5540 5541 /* set preallocation */ 5542 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5543 if (!newisdense) { 5544 PetscInt *new_local_nnz=0; 5545 5546 ptr_idxs = recv_buffer_idxs_local; 5547 if (n_recvs) { 5548 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5549 } 5550 for (i=0;i<n_recvs;i++) { 5551 PetscInt j; 5552 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5553 for (j=0;j<*(ptr_idxs+1);j++) { 5554 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5555 } 5556 } else { 5557 /* TODO */ 5558 } 5559 ptr_idxs += olengths_idxs[i]; 5560 } 5561 if (new_local_nnz) { 5562 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5563 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5564 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5565 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5566 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5567 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5568 } else { 5569 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5570 } 5571 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5572 } else { 5573 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5574 } 5575 5576 /* set values */ 5577 ptr_vals = recv_buffer_vals; 5578 ptr_idxs = recv_buffer_idxs_local; 5579 for (i=0;i<n_recvs;i++) { 5580 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5581 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5582 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5583 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5584 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5585 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5586 } else { 5587 /* TODO */ 5588 } 5589 ptr_idxs += olengths_idxs[i]; 5590 ptr_vals += olengths_vals[i]; 5591 } 5592 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5593 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5594 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5595 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5596 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5597 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5598 5599 #if 0 5600 if (!restrict_comm) { /* check */ 5601 Vec lvec,rvec; 5602 PetscReal infty_error; 5603 5604 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5605 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5606 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5607 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5608 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5609 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5610 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5611 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5612 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5613 } 5614 #endif 5615 5616 /* assemble new additional is (if any) */ 5617 if (nis) { 5618 PetscInt **temp_idxs,*count_is,j,psum; 5619 5620 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5621 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5622 ptr_idxs = recv_buffer_idxs_is; 5623 psum = 0; 5624 for (i=0;i<n_recvs;i++) { 5625 for (j=0;j<nis;j++) { 5626 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5627 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5628 psum += plen; 5629 ptr_idxs += plen+1; /* shift pointer to received data */ 5630 } 5631 } 5632 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5633 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5634 for (i=1;i<nis;i++) { 5635 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5636 } 5637 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5638 ptr_idxs = recv_buffer_idxs_is; 5639 for (i=0;i<n_recvs;i++) { 5640 for (j=0;j<nis;j++) { 5641 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5642 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5643 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5644 ptr_idxs += plen+1; /* shift pointer to received data */ 5645 } 5646 } 5647 for (i=0;i<nis;i++) { 5648 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5649 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5650 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5651 } 5652 ierr = PetscFree(count_is);CHKERRQ(ierr); 5653 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5654 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5655 } 5656 /* free workspace */ 5657 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5658 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5659 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5660 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5661 if (isdense) { 5662 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5663 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5664 } else { 5665 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5666 } 5667 if (nis) { 5668 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5669 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5670 } 5671 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5672 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5673 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5674 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5675 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5676 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5677 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5678 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5679 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5680 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5681 ierr = PetscFree(onodes);CHKERRQ(ierr); 5682 if (nis) { 5683 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5684 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5685 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5686 } 5687 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5688 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5689 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5690 for (i=0;i<nis;i++) { 5691 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5692 } 5693 *mat_n = NULL; 5694 } 5695 PetscFunctionReturn(0); 5696 } 5697 5698 /* temporary hack into ksp private data structure */ 5699 #include <petsc/private/kspimpl.h> 5700 5701 #undef __FUNCT__ 5702 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5703 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5704 { 5705 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5706 PC_IS *pcis = (PC_IS*)pc->data; 5707 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5708 MatNullSpace CoarseNullSpace = NULL; 5709 ISLocalToGlobalMapping coarse_islg; 5710 IS coarse_is,*isarray; 5711 PetscInt i,im_active=-1,active_procs=-1; 5712 PetscInt nis,nisdofs,nisneu,nisvert; 5713 PC pc_temp; 5714 PCType coarse_pc_type; 5715 KSPType coarse_ksp_type; 5716 PetscBool multilevel_requested,multilevel_allowed; 5717 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5718 Mat t_coarse_mat_is; 5719 PetscInt ncoarse; 5720 PetscBool compute_vecs = PETSC_FALSE; 5721 PetscScalar *array; 5722 MatReuse coarse_mat_reuse; 5723 PetscBool restr, full_restr, have_void; 5724 PetscErrorCode ierr; 5725 5726 PetscFunctionBegin; 5727 /* Assign global numbering to coarse dofs */ 5728 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5729 PetscInt ocoarse_size; 5730 compute_vecs = PETSC_TRUE; 5731 ocoarse_size = pcbddc->coarse_size; 5732 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5733 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5734 /* see if we can avoid some work */ 5735 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5736 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5737 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5738 PC pc; 5739 PetscBool isbddc; 5740 5741 /* temporary workaround since PCBDDC does not have a reset method so far */ 5742 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5743 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5744 if (isbddc) { 5745 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5746 } else { 5747 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5748 } 5749 coarse_reuse = PETSC_FALSE; 5750 } else { /* we can safely reuse already computed coarse matrix */ 5751 coarse_reuse = PETSC_TRUE; 5752 } 5753 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5754 coarse_reuse = PETSC_FALSE; 5755 } 5756 /* reset any subassembling information */ 5757 if (!coarse_reuse || pcbddc->recompute_topography) { 5758 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5759 } 5760 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5761 coarse_reuse = PETSC_TRUE; 5762 } 5763 /* assemble coarse matrix */ 5764 if (coarse_reuse && pcbddc->coarse_ksp) { 5765 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5766 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5767 coarse_mat_reuse = MAT_REUSE_MATRIX; 5768 } else { 5769 coarse_mat = NULL; 5770 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5771 } 5772 5773 /* creates temporary l2gmap and IS for coarse indexes */ 5774 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5775 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5776 5777 /* creates temporary MATIS object for coarse matrix */ 5778 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5779 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5780 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5781 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5782 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5783 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5784 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5785 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5786 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5787 5788 /* count "active" (i.e. with positive local size) and "void" processes */ 5789 im_active = !!(pcis->n); 5790 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5791 5792 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5793 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5794 /* full_restr : just use the receivers from the subassembling pattern */ 5795 coarse_mat_is = NULL; 5796 multilevel_allowed = PETSC_FALSE; 5797 multilevel_requested = PETSC_FALSE; 5798 full_restr = PETSC_TRUE; 5799 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5800 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5801 if (multilevel_requested) { 5802 ncoarse = active_procs/pcbddc->coarsening_ratio; 5803 restr = PETSC_FALSE; 5804 full_restr = PETSC_FALSE; 5805 } else { 5806 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5807 restr = PETSC_TRUE; 5808 full_restr = PETSC_TRUE; 5809 } 5810 ncoarse = PetscMax(1,ncoarse); 5811 if (!pcbddc->coarse_subassembling) { 5812 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5813 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5814 PetscInt psum; 5815 PetscMPIInt size; 5816 if (pcbddc->coarse_ksp) psum = 1; 5817 else psum = 0; 5818 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5819 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5820 if (ncoarse < size) have_void = PETSC_TRUE; 5821 } 5822 /* determine if we can go multilevel */ 5823 if (multilevel_requested) { 5824 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5825 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5826 } 5827 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5828 5829 /* dump subassembling pattern */ 5830 if (pcbddc->dbg_flag && multilevel_allowed) { 5831 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5832 } 5833 5834 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5835 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5836 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5837 const PetscInt *idxs; 5838 ISLocalToGlobalMapping tmap; 5839 5840 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5841 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5842 /* allocate space for temporary storage */ 5843 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5844 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5845 /* allocate for IS array */ 5846 nisdofs = pcbddc->n_ISForDofsLocal; 5847 nisneu = !!pcbddc->NeumannBoundariesLocal; 5848 nisvert = 0; /* nisvert is not used */ 5849 nis = nisdofs + nisneu + nisvert; 5850 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5851 /* dofs splitting */ 5852 for (i=0;i<nisdofs;i++) { 5853 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5854 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5855 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5856 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5857 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5858 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5859 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5860 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5861 } 5862 /* neumann boundaries */ 5863 if (pcbddc->NeumannBoundariesLocal) { 5864 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5865 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5866 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5867 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5868 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5869 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5870 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5871 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5872 } 5873 /* free memory */ 5874 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5875 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5876 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5877 } else { 5878 nis = 0; 5879 nisdofs = 0; 5880 nisneu = 0; 5881 nisvert = 0; 5882 isarray = NULL; 5883 } 5884 /* destroy no longer needed map */ 5885 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5886 5887 /* subassemble */ 5888 if (multilevel_allowed) { 5889 PetscBool reuse,reuser; 5890 if (coarse_mat) reuse = PETSC_TRUE; 5891 else reuse = PETSC_FALSE; 5892 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5893 if (reuser) { 5894 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5895 } else { 5896 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5897 } 5898 /* TODO: if (pcbddc->benign_have_null) -> give a hint to the coarser levels if they have to locally apply the benign trick or not */ 5899 } else { 5900 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5901 } 5902 if (coarse_mat_is || coarse_mat) { 5903 PetscMPIInt size; 5904 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5905 if (!multilevel_allowed) { 5906 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5907 } else { 5908 Mat A; 5909 5910 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5911 if (coarse_mat_is) { 5912 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5913 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5914 coarse_mat = coarse_mat_is; 5915 } 5916 /* be sure we don't have MatSeqDENSE as local mat */ 5917 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5918 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5919 } 5920 } 5921 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5922 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5923 5924 /* create local to global scatters for coarse problem */ 5925 if (compute_vecs) { 5926 PetscInt lrows; 5927 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5928 if (coarse_mat) { 5929 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5930 } else { 5931 lrows = 0; 5932 } 5933 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5934 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5935 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5936 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5937 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5938 } 5939 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5940 5941 /* set defaults for coarse KSP and PC */ 5942 if (multilevel_allowed) { 5943 coarse_ksp_type = KSPRICHARDSON; 5944 coarse_pc_type = PCBDDC; 5945 } else { 5946 coarse_ksp_type = KSPPREONLY; 5947 coarse_pc_type = PCREDUNDANT; 5948 } 5949 5950 /* print some info if requested */ 5951 if (pcbddc->dbg_flag) { 5952 if (!multilevel_allowed) { 5953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5954 if (multilevel_requested) { 5955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5956 } else if (pcbddc->max_levels) { 5957 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5958 } 5959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5960 } 5961 } 5962 5963 /* create the coarse KSP object only once with defaults */ 5964 if (coarse_mat) { 5965 PetscViewer dbg_viewer = NULL; 5966 if (pcbddc->dbg_flag) { 5967 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5968 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5969 } 5970 if (!pcbddc->coarse_ksp) { 5971 char prefix[256],str_level[16]; 5972 size_t len; 5973 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5974 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5975 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5976 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5977 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5978 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5979 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5980 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5981 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5982 /* prefix */ 5983 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5984 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5985 if (!pcbddc->current_level) { 5986 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5987 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5988 } else { 5989 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5990 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5991 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5992 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5993 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5994 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5995 } 5996 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5997 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5998 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5999 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6000 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6001 /* allow user customization */ 6002 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6003 } 6004 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6005 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6006 if (nisdofs) { 6007 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6008 for (i=0;i<nisdofs;i++) { 6009 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6010 } 6011 } 6012 if (nisneu) { 6013 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6014 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6015 } 6016 if (nisvert) { 6017 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6018 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6019 } 6020 6021 /* get some info after set from options */ 6022 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6023 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6024 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6025 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6026 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6027 isbddc = PETSC_FALSE; 6028 } 6029 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6030 if (isredundant) { 6031 KSP inner_ksp; 6032 PC inner_pc; 6033 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6034 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6035 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6036 } 6037 6038 /* parameters which miss an API */ 6039 if (isbddc) { 6040 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6041 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6042 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6043 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6044 if (pcbddc_coarse->benign_saddle_point) { 6045 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 6046 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6047 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6048 } 6049 } 6050 6051 /* propagate symmetry info of coarse matrix */ 6052 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6053 if (pc->pmat->symmetric_set) { 6054 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6055 } 6056 if (pc->pmat->hermitian_set) { 6057 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6058 } 6059 if (pc->pmat->spd_set) { 6060 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6061 } 6062 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6063 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6064 } 6065 /* set operators */ 6066 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6067 if (pcbddc->dbg_flag) { 6068 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6069 } 6070 } 6071 ierr = PetscFree(isarray);CHKERRQ(ierr); 6072 #if 0 6073 { 6074 PetscViewer viewer; 6075 char filename[256]; 6076 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6077 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6078 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6079 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6080 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6081 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6082 } 6083 #endif 6084 6085 if (pcbddc->coarse_ksp) { 6086 Vec crhs,csol; 6087 6088 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6089 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6090 if (!csol) { 6091 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6092 } 6093 if (!crhs) { 6094 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6095 } 6096 } 6097 6098 /* compute null space for coarse solver if the benign trick has been requested */ 6099 if (pcbddc->benign_null) { 6100 6101 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6102 for (i=0;i<pcbddc->benign_n;i++) { 6103 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6104 } 6105 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6106 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6107 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6108 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6109 if (coarse_mat) { 6110 Vec nullv; 6111 PetscScalar *array,*array2; 6112 PetscInt nl; 6113 6114 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6115 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6116 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6117 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6118 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6119 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6120 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6121 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6122 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6123 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6124 } 6125 } 6126 6127 if (pcbddc->coarse_ksp) { 6128 PetscBool ispreonly; 6129 6130 if (CoarseNullSpace) { 6131 PetscBool isnull; 6132 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6133 if (isnull) { 6134 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6135 } 6136 /* TODO: add local nullspaces (if any) */ 6137 } 6138 /* setup coarse ksp */ 6139 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6140 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6141 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6142 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6143 KSP check_ksp; 6144 KSPType check_ksp_type; 6145 PC check_pc; 6146 Vec check_vec,coarse_vec; 6147 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6148 PetscInt its; 6149 PetscBool compute_eigs; 6150 PetscReal *eigs_r,*eigs_c; 6151 PetscInt neigs; 6152 const char *prefix; 6153 6154 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6155 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6156 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6157 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6158 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6159 /* prevent from setup unneeded object */ 6160 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6161 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6162 if (ispreonly) { 6163 check_ksp_type = KSPPREONLY; 6164 compute_eigs = PETSC_FALSE; 6165 } else { 6166 check_ksp_type = KSPGMRES; 6167 compute_eigs = PETSC_TRUE; 6168 } 6169 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6170 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6171 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6172 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6173 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6174 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6175 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6176 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6177 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6178 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6179 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6180 /* create random vec */ 6181 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6182 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6183 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6184 /* solve coarse problem */ 6185 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6186 /* set eigenvalue estimation if preonly has not been requested */ 6187 if (compute_eigs) { 6188 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6189 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6190 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6191 lambda_max = eigs_r[neigs-1]; 6192 lambda_min = eigs_r[0]; 6193 if (pcbddc->use_coarse_estimates) { 6194 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6195 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6196 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6197 } 6198 } 6199 } 6200 6201 /* check coarse problem residual error */ 6202 if (pcbddc->dbg_flag) { 6203 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6204 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6205 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6206 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6207 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6208 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6209 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6210 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6211 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6212 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6213 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6214 if (CoarseNullSpace) { 6215 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6216 } 6217 if (compute_eigs) { 6218 PetscReal lambda_max_s,lambda_min_s; 6219 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6220 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6221 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6222 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6223 for (i=0;i<neigs;i++) { 6224 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6225 } 6226 } 6227 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6228 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6229 } 6230 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6231 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6232 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6233 if (compute_eigs) { 6234 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6235 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6236 } 6237 } 6238 } 6239 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6240 /* print additional info */ 6241 if (pcbddc->dbg_flag) { 6242 /* waits until all processes reaches this point */ 6243 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6244 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6245 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6246 } 6247 6248 /* free memory */ 6249 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6250 PetscFunctionReturn(0); 6251 } 6252 6253 #undef __FUNCT__ 6254 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6255 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6256 { 6257 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6258 PC_IS* pcis = (PC_IS*)pc->data; 6259 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6260 IS subset,subset_mult,subset_n; 6261 PetscInt local_size,coarse_size=0; 6262 PetscInt *local_primal_indices=NULL; 6263 const PetscInt *t_local_primal_indices; 6264 PetscErrorCode ierr; 6265 6266 PetscFunctionBegin; 6267 /* Compute global number of coarse dofs */ 6268 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6270 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6271 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6272 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6273 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6274 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6275 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6276 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6277 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6278 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6279 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6280 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6281 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6282 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6283 6284 /* check numbering */ 6285 if (pcbddc->dbg_flag) { 6286 PetscScalar coarsesum,*array,*array2; 6287 PetscInt i; 6288 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6289 6290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6291 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6292 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6293 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6294 /* counter */ 6295 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6296 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6297 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6298 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6299 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6300 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6301 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6302 for (i=0;i<pcbddc->local_primal_size;i++) { 6303 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6304 } 6305 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6306 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6307 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6308 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6309 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6310 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6311 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6312 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6313 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6314 for (i=0;i<pcis->n;i++) { 6315 if (array[i] != 0.0 && array[i] != array2[i]) { 6316 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6317 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6318 set_error = PETSC_TRUE; 6319 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6320 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6321 } 6322 } 6323 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6324 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6325 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6326 for (i=0;i<pcis->n;i++) { 6327 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6328 } 6329 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6330 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6331 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6332 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6333 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6335 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6336 PetscInt *gidxs; 6337 6338 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6339 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6340 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6341 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6342 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6343 for (i=0;i<pcbddc->local_primal_size;i++) { 6344 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6345 } 6346 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6347 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6348 } 6349 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6350 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6351 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6352 } 6353 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6354 /* get back data */ 6355 *coarse_size_n = coarse_size; 6356 *local_primal_indices_n = local_primal_indices; 6357 PetscFunctionReturn(0); 6358 } 6359 6360 #undef __FUNCT__ 6361 #define __FUNCT__ "PCBDDCGlobalToLocal" 6362 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6363 { 6364 IS localis_t; 6365 PetscInt i,lsize,*idxs,n; 6366 PetscScalar *vals; 6367 PetscErrorCode ierr; 6368 6369 PetscFunctionBegin; 6370 /* get indices in local ordering exploiting local to global map */ 6371 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6372 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6373 for (i=0;i<lsize;i++) vals[i] = 1.0; 6374 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6375 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6376 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6377 if (idxs) { /* multilevel guard */ 6378 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6379 } 6380 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6381 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6382 ierr = PetscFree(vals);CHKERRQ(ierr); 6383 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6384 /* now compute set in local ordering */ 6385 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6386 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6387 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6388 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6389 for (i=0,lsize=0;i<n;i++) { 6390 if (PetscRealPart(vals[i]) > 0.5) { 6391 lsize++; 6392 } 6393 } 6394 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6395 for (i=0,lsize=0;i<n;i++) { 6396 if (PetscRealPart(vals[i]) > 0.5) { 6397 idxs[lsize++] = i; 6398 } 6399 } 6400 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6401 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6402 *localis = localis_t; 6403 PetscFunctionReturn(0); 6404 } 6405 6406 #undef __FUNCT__ 6407 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6408 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6409 { 6410 PC_IS *pcis=(PC_IS*)pc->data; 6411 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6412 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6413 Mat S_j; 6414 PetscInt *used_xadj,*used_adjncy; 6415 PetscBool free_used_adj; 6416 PetscErrorCode ierr; 6417 6418 PetscFunctionBegin; 6419 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6420 free_used_adj = PETSC_FALSE; 6421 if (pcbddc->sub_schurs_layers == -1) { 6422 used_xadj = NULL; 6423 used_adjncy = NULL; 6424 } else { 6425 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6426 used_xadj = pcbddc->mat_graph->xadj; 6427 used_adjncy = pcbddc->mat_graph->adjncy; 6428 } else if (pcbddc->computed_rowadj) { 6429 used_xadj = pcbddc->mat_graph->xadj; 6430 used_adjncy = pcbddc->mat_graph->adjncy; 6431 } else { 6432 PetscBool flg_row=PETSC_FALSE; 6433 const PetscInt *xadj,*adjncy; 6434 PetscInt nvtxs; 6435 6436 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6437 if (flg_row) { 6438 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6439 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6440 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6441 free_used_adj = PETSC_TRUE; 6442 } else { 6443 pcbddc->sub_schurs_layers = -1; 6444 used_xadj = NULL; 6445 used_adjncy = NULL; 6446 } 6447 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6448 } 6449 } 6450 6451 /* setup sub_schurs data */ 6452 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6453 if (!sub_schurs->schur_explicit) { 6454 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6455 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6456 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6457 } else { 6458 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6459 PetscBool isseqaij,need_change = PETSC_FALSE;; 6460 PetscInt benign_n; 6461 Mat change = NULL; 6462 Vec scaling = NULL; 6463 IS change_primal = NULL; 6464 6465 if (!pcbddc->use_vertices && reuse_solvers) { 6466 PetscInt n_vertices; 6467 6468 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6469 reuse_solvers = (PetscBool)!n_vertices; 6470 } 6471 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6472 if (!isseqaij) { 6473 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6474 if (matis->A == pcbddc->local_mat) { 6475 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6476 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6477 } else { 6478 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6479 } 6480 } 6481 if (!pcbddc->benign_change_explicit) { 6482 benign_n = pcbddc->benign_n; 6483 } else { 6484 benign_n = 0; 6485 } 6486 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6487 We need a global reduction to avoid possible deadlocks. 6488 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6489 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6490 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6491 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6492 need_change = (PetscBool)(!need_change); 6493 } 6494 /* If the user defines additional constraints, we import them here. 6495 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6496 if (need_change) { 6497 PC_IS *pcisf; 6498 PC_BDDC *pcbddcf; 6499 PC pcf; 6500 6501 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6502 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6503 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6504 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6505 /* hacks */ 6506 pcisf = (PC_IS*)pcf->data; 6507 pcisf->is_B_local = pcis->is_B_local; 6508 pcisf->vec1_N = pcis->vec1_N; 6509 pcisf->BtoNmap = pcis->BtoNmap; 6510 pcisf->n = pcis->n; 6511 pcisf->n_B = pcis->n_B; 6512 pcbddcf = (PC_BDDC*)pcf->data; 6513 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6514 pcbddcf->mat_graph = pcbddc->mat_graph; 6515 pcbddcf->use_faces = PETSC_TRUE; 6516 pcbddcf->use_change_of_basis = PETSC_TRUE; 6517 pcbddcf->use_change_on_faces = PETSC_TRUE; 6518 pcbddcf->use_qr_single = PETSC_TRUE; 6519 pcbddcf->fake_change = PETSC_TRUE; 6520 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6521 /* store information on primal vertices and change of basis (in local numbering) */ 6522 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6523 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6524 change = pcbddcf->ConstraintMatrix; 6525 pcbddcf->ConstraintMatrix = NULL; 6526 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6527 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6528 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6529 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6530 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6531 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6532 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6533 pcf->ops->destroy = NULL; 6534 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6535 } 6536 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6537 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6538 ierr = MatDestroy(&change);CHKERRQ(ierr); 6539 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6540 } 6541 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6542 6543 /* free adjacency */ 6544 if (free_used_adj) { 6545 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6546 } 6547 PetscFunctionReturn(0); 6548 } 6549 6550 #undef __FUNCT__ 6551 #define __FUNCT__ "PCBDDCInitSubSchurs" 6552 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6553 { 6554 PC_IS *pcis=(PC_IS*)pc->data; 6555 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6556 PCBDDCGraph graph; 6557 PetscErrorCode ierr; 6558 6559 PetscFunctionBegin; 6560 /* attach interface graph for determining subsets */ 6561 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6562 IS verticesIS,verticescomm; 6563 PetscInt vsize,*idxs; 6564 6565 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6566 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6567 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6568 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6569 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6570 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6571 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6572 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6573 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6574 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6575 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6576 } else { 6577 graph = pcbddc->mat_graph; 6578 } 6579 /* print some info */ 6580 if (pcbddc->dbg_flag) { 6581 IS vertices; 6582 PetscInt nv,nedges,nfaces; 6583 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6584 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6585 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6586 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6587 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6590 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6593 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6594 } 6595 6596 /* sub_schurs init */ 6597 if (!pcbddc->sub_schurs) { 6598 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6599 } 6600 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6601 6602 /* free graph struct */ 6603 if (pcbddc->sub_schurs_rebuild) { 6604 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6605 } 6606 PetscFunctionReturn(0); 6607 } 6608 6609 #undef __FUNCT__ 6610 #define __FUNCT__ "PCBDDCCheckOperator" 6611 PetscErrorCode PCBDDCCheckOperator(PC pc) 6612 { 6613 PC_IS *pcis=(PC_IS*)pc->data; 6614 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6615 PetscErrorCode ierr; 6616 6617 PetscFunctionBegin; 6618 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6619 IS zerodiag = NULL; 6620 Mat S_j,B0_B=NULL; 6621 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6622 PetscScalar *p0_check,*array,*array2; 6623 PetscReal norm; 6624 PetscInt i; 6625 6626 /* B0 and B0_B */ 6627 if (zerodiag) { 6628 IS dummy; 6629 6630 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6631 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6632 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6633 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6634 } 6635 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6636 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6637 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6638 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6639 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6640 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6641 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6642 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6643 /* S_j */ 6644 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6645 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6646 6647 /* mimic vector in \widetilde{W}_\Gamma */ 6648 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6649 /* continuous in primal space */ 6650 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6651 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6652 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6653 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6654 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6655 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6656 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6657 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6658 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6659 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6660 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6661 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6662 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6663 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6664 6665 /* assemble rhs for coarse problem */ 6666 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6667 /* local with Schur */ 6668 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6669 if (zerodiag) { 6670 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6671 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6672 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6673 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6674 } 6675 /* sum on primal nodes the local contributions */ 6676 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6677 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6678 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6679 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6680 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6681 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6682 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6683 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6684 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6685 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6686 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6687 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6688 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6689 /* scale primal nodes (BDDC sums contibutions) */ 6690 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6691 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6692 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6693 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6694 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6695 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6696 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6697 /* global: \widetilde{B0}_B w_\Gamma */ 6698 if (zerodiag) { 6699 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6700 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6701 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6702 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6703 } 6704 /* BDDC */ 6705 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6706 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6707 6708 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6709 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6710 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6711 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6712 for (i=0;i<pcbddc->benign_n;i++) { 6713 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6714 } 6715 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6716 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6717 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6718 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6719 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6720 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6721 } 6722 PetscFunctionReturn(0); 6723 } 6724