1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 10 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PCBDDCGraph graph, MatNullSpace *nnsp) 11 { 12 PetscErrorCode ierr; 13 ISLocalToGlobalMapping rmap; 14 IS *faces,*edges; 15 Vec p,v,quad_vec,*quad_vecs; 16 PetscScalar *vals; 17 const PetscScalar *array; 18 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 19 PetscMPIInt rank; 20 21 PetscFunctionBegin; 22 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 23 if (graph->twodim) { 24 lmaxneighs = 2; 25 } else { 26 lmaxneighs = 1; 27 for (i=0;i<ne;i++) { 28 const PetscInt *idxs; 29 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 30 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 31 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 32 } 33 lmaxneighs++; /* graph count does not include self */ 34 } 35 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 36 maxsize = 0; 37 for (i=0;i<ne;i++) { 38 PetscInt nn; 39 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 40 maxsize = PetscMax(maxsize,nn); 41 } 42 for (i=0;i<nf;i++) { 43 PetscInt nn; 44 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 45 maxsize = PetscMax(maxsize,nn); 46 } 47 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 48 /* create vectors to hold quadrature weights */ 49 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 50 ierr = MatGetLocalToGlobalMapping(A,&rmap,NULL);CHKERRQ(ierr); 51 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 52 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 53 for (i=0;i<maxneighs;i++) { 54 PetscInt first,last; 55 56 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],rmap);CHKERRQ(ierr); 57 /* the near-null space fo BDDC carries information on quadrature weights, 58 and these can be collinear -> so cheat with MatNullSpaceCreate 59 and create a suitable set of basis vectors first */ 60 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 61 if (i>=first && i < last) { 62 PetscScalar *data; 63 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 64 data[i-first] = 1.; 65 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 66 } 67 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 68 } 69 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 70 for (i=0;i<maxneighs;i++) { /* reset vectors */ 71 PetscInt first,last; 72 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 73 if (i>=first && i < last) { 74 PetscScalar *data; 75 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 76 data[i-first] = 0.; 77 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 78 } 79 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 80 } 81 /* compute local quad vec */ 82 ierr = MatCreateVecs(divudotp,&v,&p);CHKERRQ(ierr); 83 ierr = VecSet(p,1.);CHKERRQ(ierr); 84 ierr = MatMultTranspose(divudotp,p,v);CHKERRQ(ierr); 85 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 86 ierr = VecDestroy(&p);CHKERRQ(ierr); 87 /* insert in global quadrature vecs */ 88 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 89 for (i=0;i<nf;i++) { 90 const PetscInt *idxs; 91 PetscInt idx,nn,j; 92 93 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 94 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 95 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 96 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 97 idx = -(idx+1); 98 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 99 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 100 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 101 } 102 for (i=0;i<ne;i++) { 103 const PetscInt *idxs; 104 PetscInt idx,nn,j; 105 106 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 107 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 108 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 109 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 110 idx = -(idx+1); 111 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 112 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 113 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 114 } 115 ierr = PetscFree(edges);CHKERRQ(ierr); 116 ierr = PetscFree(faces);CHKERRQ(ierr); 117 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 118 ierr = VecDestroy(&v);CHKERRQ(ierr); 119 ierr = PetscFree(vals);CHKERRQ(ierr); 120 121 /* assemble near null space */ 122 for (i=0;i<maxneighs;i++) { 123 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 124 } 125 for (i=0;i<maxneighs;i++) { 126 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 127 } 128 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 135 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 136 { 137 PetscErrorCode ierr; 138 Vec local,global; 139 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 140 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 141 142 PetscFunctionBegin; 143 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 144 /* need to convert from global to local topology information and remove references to information in global ordering */ 145 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 146 if (pcbddc->user_provided_isfordofs) { 147 if (pcbddc->n_ISForDofs) { 148 PetscInt i; 149 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 150 for (i=0;i<pcbddc->n_ISForDofs;i++) { 151 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 152 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 153 } 154 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 155 pcbddc->n_ISForDofs = 0; 156 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 157 } 158 } else { 159 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 160 PetscInt i, n = matis->A->rmap->n; 161 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 162 if (i > 1) { 163 pcbddc->n_ISForDofsLocal = i; 164 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 165 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 166 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 167 } 168 } 169 } 170 } 171 172 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 173 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 174 } 175 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 176 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 177 } 178 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 179 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 180 } 181 ierr = VecDestroy(&global);CHKERRQ(ierr); 182 ierr = VecDestroy(&local);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 188 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 189 { 190 PC_IS *pcis = (PC_IS*)(pc->data); 191 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 192 PetscErrorCode ierr; 193 194 PetscFunctionBegin; 195 if (!pcbddc->benign_have_null) { 196 PetscFunctionReturn(0); 197 } 198 if (pcbddc->ChangeOfBasisMatrix) { 199 Vec swap; 200 201 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 202 swap = pcbddc->work_change; 203 pcbddc->work_change = r; 204 r = swap; 205 } 206 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 207 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 208 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 209 ierr = VecSet(z,0.);CHKERRQ(ierr); 210 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 211 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 212 if (pcbddc->ChangeOfBasisMatrix) { 213 Vec swap; 214 215 swap = r; 216 r = pcbddc->work_change; 217 pcbddc->work_change = swap; 218 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 219 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 220 } 221 PetscFunctionReturn(0); 222 } 223 224 #undef __FUNCT__ 225 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 226 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 227 { 228 PCBDDCBenignMatMult_ctx ctx; 229 PetscErrorCode ierr; 230 PetscBool apply_right,apply_left,reset_x; 231 232 PetscFunctionBegin; 233 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 234 if (transpose) { 235 apply_right = ctx->apply_left; 236 apply_left = ctx->apply_right; 237 } else { 238 apply_right = ctx->apply_right; 239 apply_left = ctx->apply_left; 240 } 241 reset_x = PETSC_FALSE; 242 if (apply_right) { 243 const PetscScalar *ax; 244 PetscInt nl,i; 245 246 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 247 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 248 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 249 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 250 for (i=0;i<ctx->benign_n;i++) { 251 PetscScalar sum,val; 252 const PetscInt *idxs; 253 PetscInt nz,j; 254 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 255 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 256 sum = 0.; 257 if (ctx->apply_p0) { 258 val = ctx->work[idxs[nz-1]]; 259 for (j=0;j<nz-1;j++) { 260 sum += ctx->work[idxs[j]]; 261 ctx->work[idxs[j]] += val; 262 } 263 } else { 264 for (j=0;j<nz-1;j++) { 265 sum += ctx->work[idxs[j]]; 266 } 267 } 268 ctx->work[idxs[nz-1]] -= sum; 269 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 270 } 271 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 272 reset_x = PETSC_TRUE; 273 } 274 if (transpose) { 275 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 276 } else { 277 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 278 } 279 if (reset_x) { 280 ierr = VecResetArray(x);CHKERRQ(ierr); 281 } 282 if (apply_left) { 283 PetscScalar *ay; 284 PetscInt i; 285 286 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 287 for (i=0;i<ctx->benign_n;i++) { 288 PetscScalar sum,val; 289 const PetscInt *idxs; 290 PetscInt nz,j; 291 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 292 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 293 val = -ay[idxs[nz-1]]; 294 if (ctx->apply_p0) { 295 sum = 0.; 296 for (j=0;j<nz-1;j++) { 297 sum += ay[idxs[j]]; 298 ay[idxs[j]] += val; 299 } 300 ay[idxs[nz-1]] += sum; 301 } else { 302 for (j=0;j<nz-1;j++) { 303 ay[idxs[j]] += val; 304 } 305 ay[idxs[nz-1]] = 0.; 306 } 307 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 308 } 309 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 316 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 317 { 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 322 PetscFunctionReturn(0); 323 } 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 327 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 328 { 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCBenignShellMat" 338 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 339 { 340 PC_IS *pcis = (PC_IS*)pc->data; 341 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 342 PCBDDCBenignMatMult_ctx ctx; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 if (!restore) { 347 Mat A_IB,A_BI; 348 PetscScalar *work; 349 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 350 351 if (pcbddc->benign_original_mat) { 352 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 353 } 354 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 355 PetscFunctionReturn(0); 356 } 357 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 358 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 359 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 360 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 361 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 362 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 363 ierr = PetscNew(&ctx);CHKERRQ(ierr); 364 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 365 ctx->apply_left = PETSC_TRUE; 366 ctx->apply_right = PETSC_FALSE; 367 ctx->apply_p0 = PETSC_FALSE; 368 ctx->benign_n = pcbddc->benign_n; 369 if (reuse) { 370 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 371 ctx->free = PETSC_FALSE; 372 } else { /* TODO: could be optimized for successive solves */ 373 ISLocalToGlobalMapping N_to_D; 374 PetscInt i; 375 376 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 377 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 378 for (i=0;i<pcbddc->benign_n;i++) { 379 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 380 } 381 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 382 ctx->free = PETSC_TRUE; 383 } 384 ctx->A = pcis->A_IB; 385 ctx->work = work; 386 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 387 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 388 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 389 pcis->A_IB = A_IB; 390 391 /* A_BI as A_IB^T */ 392 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 393 pcbddc->benign_original_mat = pcis->A_BI; 394 pcis->A_BI = A_BI; 395 } else { 396 if (!pcbddc->benign_original_mat) { 397 PetscFunctionReturn(0); 398 } 399 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 401 pcis->A_IB = ctx->A; 402 ctx->A = NULL; 403 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 404 pcis->A_BI = pcbddc->benign_original_mat; 405 pcbddc->benign_original_mat = NULL; 406 if (ctx->free) { 407 PetscInt i; 408 for (i=0;i<ctx->benign_n;i++) { 409 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 410 } 411 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 412 } 413 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 414 ierr = PetscFree(ctx);CHKERRQ(ierr); 415 } 416 PetscFunctionReturn(0); 417 } 418 419 /* used just in bddc debug mode */ 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCBDDCBenignProject" 422 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 423 { 424 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 425 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 426 Mat An; 427 PetscErrorCode ierr; 428 429 PetscFunctionBegin; 430 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 431 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 432 if (is1) { 433 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 434 ierr = MatDestroy(&An);CHKERRQ(ierr); 435 } else { 436 *B = An; 437 } 438 PetscFunctionReturn(0); 439 } 440 441 /* TODO: add reuse flag */ 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatSeqAIJCompress" 444 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 445 { 446 Mat Bt; 447 PetscScalar *a,*bdata; 448 const PetscInt *ii,*ij; 449 PetscInt m,n,i,nnz,*bii,*bij; 450 PetscBool flg_row; 451 PetscErrorCode ierr; 452 453 PetscFunctionBegin; 454 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 455 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 456 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 457 nnz = n; 458 for (i=0;i<ii[n];i++) { 459 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 460 } 461 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 462 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 463 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 464 nnz = 0; 465 bii[0] = 0; 466 for (i=0;i<n;i++) { 467 PetscInt j; 468 for (j=ii[i];j<ii[i+1];j++) { 469 PetscScalar entry = a[j]; 470 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 471 bij[nnz] = ij[j]; 472 bdata[nnz] = entry; 473 nnz++; 474 } 475 } 476 bii[i+1] = nnz; 477 } 478 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 479 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 480 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 481 { 482 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 483 b->free_a = PETSC_TRUE; 484 b->free_ij = PETSC_TRUE; 485 } 486 *B = Bt; 487 PetscFunctionReturn(0); 488 } 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatDetectDisconnectedComponents" 492 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 493 { 494 Mat B; 495 IS is_dummy,*cc_n; 496 ISLocalToGlobalMapping l2gmap_dummy; 497 PCBDDCGraph graph; 498 PetscInt i,n; 499 PetscInt *xadj,*adjncy; 500 PetscInt *xadj_filtered,*adjncy_filtered; 501 PetscBool flg_row,isseqaij; 502 PetscErrorCode ierr; 503 504 PetscFunctionBegin; 505 if (!A->rmap->N || !A->cmap->N) { 506 *ncc = 0; 507 *cc = NULL; 508 PetscFunctionReturn(0); 509 } 510 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 511 if (!isseqaij && filter) { 512 PetscBool isseqdense; 513 514 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 515 if (!isseqdense) { 516 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 517 } else { /* TODO: rectangular case and LDA */ 518 PetscScalar *array; 519 PetscReal chop=1.e-6; 520 521 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 522 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 523 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 524 for (i=0;i<n;i++) { 525 PetscInt j; 526 for (j=i+1;j<n;j++) { 527 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 528 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 529 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 530 } 531 } 532 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 533 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 534 } 535 } else { 536 B = A; 537 } 538 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 539 540 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 541 if (filter) { 542 PetscScalar *data; 543 PetscInt j,cum; 544 545 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 546 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 547 cum = 0; 548 for (i=0;i<n;i++) { 549 PetscInt t; 550 551 for (j=xadj[i];j<xadj[i+1];j++) { 552 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 553 continue; 554 } 555 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 556 } 557 t = xadj_filtered[i]; 558 xadj_filtered[i] = cum; 559 cum += t; 560 } 561 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 562 } else { 563 xadj_filtered = NULL; 564 adjncy_filtered = NULL; 565 } 566 567 /* compute local connected components using PCBDDCGraph */ 568 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 569 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 570 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 571 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 572 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 573 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 574 if (xadj_filtered) { 575 graph->xadj = xadj_filtered; 576 graph->adjncy = adjncy_filtered; 577 } else { 578 graph->xadj = xadj; 579 graph->adjncy = adjncy; 580 } 581 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 582 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 583 /* partial clean up */ 584 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 585 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 586 if (A != B) { 587 ierr = MatDestroy(&B);CHKERRQ(ierr); 588 } 589 590 /* get back data */ 591 if (ncc) *ncc = graph->ncc; 592 if (cc) { 593 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 594 for (i=0;i<graph->ncc;i++) { 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 596 } 597 *cc = cc_n; 598 } 599 /* clean up graph */ 600 graph->xadj = 0; 601 graph->adjncy = 0; 602 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "PCBDDCBenignCheck" 608 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 609 { 610 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 611 PC_IS* pcis = (PC_IS*)(pc->data); 612 IS dirIS = NULL; 613 PetscInt i; 614 PetscErrorCode ierr; 615 616 PetscFunctionBegin; 617 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 618 if (zerodiag) { 619 Mat A; 620 Vec vec3_N; 621 PetscScalar *vals; 622 const PetscInt *idxs; 623 PetscInt nz,*count; 624 625 /* p0 */ 626 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 627 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 628 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 629 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 630 for (i=0;i<nz;i++) vals[i] = 1.; 631 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 632 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 633 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 634 /* v_I */ 635 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 636 for (i=0;i<nz;i++) vals[i] = 0.; 637 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 638 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 639 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 640 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 641 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 642 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 643 if (dirIS) { 644 PetscInt n; 645 646 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 647 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 648 for (i=0;i<n;i++) vals[i] = 0.; 649 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 650 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 651 } 652 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 653 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 654 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 655 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 656 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 657 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 658 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 659 if (PetscAbsScalar(vals[0]) > 1.e-1) { 660 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 661 } 662 ierr = PetscFree(vals);CHKERRQ(ierr); 663 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 664 665 /* there should not be any pressure dofs lying on the interface */ 666 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 667 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 668 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 669 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 670 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 671 for (i=0;i<nz;i++) { 672 if (count[idxs[i]]) { 673 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 674 } 675 } 676 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 677 ierr = PetscFree(count);CHKERRQ(ierr); 678 } 679 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 680 681 /* check PCBDDCBenignGetOrSetP0 */ 682 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 683 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 684 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 685 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 686 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 687 for (i=0;i<pcbddc->benign_n;i++) { 688 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 689 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 690 } 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 697 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 698 { 699 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 700 IS pressures,zerodiag,*zerodiag_subs; 701 PetscInt nz,n; 702 PetscInt *interior_dofs,n_interior_dofs; 703 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 708 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 709 for (n=0;n<pcbddc->benign_n;n++) { 710 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 711 } 712 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 713 pcbddc->benign_n = 0; 714 /* if a local info on dofs is present, assumes that the last field represents "pressures" 715 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 716 Checks if all the pressure dofs in each subdomain have a zero diagonal 717 If not, a change of basis on pressures is not needed 718 since the local Schur complements are already SPD 719 */ 720 has_null_pressures = PETSC_TRUE; 721 have_null = PETSC_TRUE; 722 if (pcbddc->n_ISForDofsLocal) { 723 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 724 725 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 726 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 727 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 728 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 729 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 730 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 731 if (!sorted) { 732 ierr = ISSort(pressures);CHKERRQ(ierr); 733 } 734 } else { 735 pressures = NULL; 736 } 737 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 738 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 739 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 740 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 741 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 742 if (!sorted) { 743 ierr = ISSort(zerodiag);CHKERRQ(ierr); 744 } 745 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 746 if (!nz) { 747 if (n) have_null = PETSC_FALSE; 748 has_null_pressures = PETSC_FALSE; 749 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 750 } 751 recompute_zerodiag = PETSC_FALSE; 752 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 753 zerodiag_subs = NULL; 754 pcbddc->benign_n = 0; 755 n_interior_dofs = 0; 756 interior_dofs = NULL; 757 if (pcbddc->current_level) { /* need to compute interior nodes */ 758 PetscInt n,i,j; 759 PetscInt n_neigh,*neigh,*n_shared,**shared; 760 PetscInt *iwork; 761 762 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 763 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 764 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 765 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 766 for (i=0;i<n_neigh;i++) 767 for (j=0;j<n_shared[i];j++) 768 iwork[shared[i][j]] += 1; 769 for (i=0;i<n;i++) 770 if (!iwork[i]) 771 interior_dofs[n_interior_dofs++] = i; 772 ierr = PetscFree(iwork);CHKERRQ(ierr); 773 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 774 } 775 if (has_null_pressures) { 776 IS *subs; 777 PetscInt nsubs,i,j,nl; 778 const PetscInt *idxs; 779 PetscScalar *array; 780 Vec *work; 781 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 782 783 subs = pcbddc->local_subs; 784 nsubs = pcbddc->n_local_subs; 785 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 786 if (pcbddc->current_level) { 787 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 788 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 789 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 790 /* work[0] = 1_p */ 791 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 792 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 793 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 794 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 795 /* work[0] = 1_v */ 796 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 797 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 798 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 799 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 800 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 801 } 802 if (nsubs > 1) { 803 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 804 for (i=0;i<nsubs;i++) { 805 ISLocalToGlobalMapping l2g; 806 IS t_zerodiag_subs; 807 PetscInt nl; 808 809 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 810 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 811 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 812 if (nl) { 813 PetscBool valid = PETSC_TRUE; 814 815 if (pcbddc->current_level) { 816 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 817 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 818 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 819 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 820 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 821 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 822 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 823 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 824 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 825 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 826 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 827 for (j=0;j<n_interior_dofs;j++) { 828 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 829 valid = PETSC_FALSE; 830 break; 831 } 832 } 833 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 834 } 835 if (valid && pcbddc->NeumannBoundariesLocal) { 836 IS t_bc; 837 PetscInt nzb; 838 839 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 840 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 841 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 842 if (nzb) valid = PETSC_FALSE; 843 } 844 if (valid && pressures) { 845 IS t_pressure_subs; 846 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 847 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 848 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 849 } 850 if (valid) { 851 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 852 pcbddc->benign_n++; 853 } else { 854 recompute_zerodiag = PETSC_TRUE; 855 } 856 } 857 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 858 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 859 } 860 } else { /* there's just one subdomain (or zero if they have not been detected */ 861 PetscBool valid = PETSC_TRUE; 862 863 if (pcbddc->NeumannBoundariesLocal) { 864 PetscInt nzb; 865 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 866 if (nzb) valid = PETSC_FALSE; 867 } 868 if (valid && pressures) { 869 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 870 } 871 if (valid && pcbddc->current_level) { 872 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 873 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 874 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 875 for (j=0;j<n_interior_dofs;j++) { 876 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 877 valid = PETSC_FALSE; 878 break; 879 } 880 } 881 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 882 } 883 if (valid) { 884 pcbddc->benign_n = 1; 885 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 886 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 887 zerodiag_subs[0] = zerodiag; 888 } 889 } 890 if (pcbddc->current_level) { 891 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 892 } 893 } 894 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 895 896 if (!pcbddc->benign_n) { 897 PetscInt n; 898 899 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 900 recompute_zerodiag = PETSC_FALSE; 901 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 902 if (n) { 903 has_null_pressures = PETSC_FALSE; 904 have_null = PETSC_FALSE; 905 } 906 } 907 908 /* final check for null pressures */ 909 if (zerodiag && pressures) { 910 PetscInt nz,np; 911 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 912 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 913 if (nz != np) have_null = PETSC_FALSE; 914 } 915 916 if (recompute_zerodiag) { 917 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 918 if (pcbddc->benign_n == 1) { 919 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 920 zerodiag = zerodiag_subs[0]; 921 } else { 922 PetscInt i,nzn,*new_idxs; 923 924 nzn = 0; 925 for (i=0;i<pcbddc->benign_n;i++) { 926 PetscInt ns; 927 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 928 nzn += ns; 929 } 930 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 931 nzn = 0; 932 for (i=0;i<pcbddc->benign_n;i++) { 933 PetscInt ns,*idxs; 934 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 935 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 936 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 937 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 938 nzn += ns; 939 } 940 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 941 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 942 } 943 have_null = PETSC_FALSE; 944 } 945 946 /* Prepare matrix to compute no-net-flux */ 947 if (pcbddc->benign_compute_nonetflux && !pcbddc->divudotp) { 948 Mat A; 949 IS isused = NULL; 950 PetscInt n; 951 if (pressures) { 952 isused = pressures; 953 } else { 954 isused = zerodiag; 955 } 956 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 957 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 958 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 959 if (n) { 960 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&pcbddc->divudotp);CHKERRQ(ierr); 961 } else { 962 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&pcbddc->divudotp);CHKERRQ(ierr); 963 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 964 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 } 966 } 967 968 /* change of basis and p0 dofs */ 969 if (has_null_pressures) { 970 IS zerodiagc; 971 const PetscInt *idxs,*idxsc; 972 PetscInt i,s,*nnz; 973 974 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 975 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 976 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 977 /* local change of basis for pressures */ 978 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 979 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 980 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 981 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 982 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 983 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 984 for (i=0;i<pcbddc->benign_n;i++) { 985 PetscInt nzs,j; 986 987 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 988 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 989 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 990 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 991 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 992 } 993 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 994 ierr = PetscFree(nnz);CHKERRQ(ierr); 995 /* set identity on velocities */ 996 for (i=0;i<n-nz;i++) { 997 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 998 } 999 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1000 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1001 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1002 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1003 /* set change on pressures */ 1004 for (s=0;s<pcbddc->benign_n;s++) { 1005 PetscScalar *array; 1006 PetscInt nzs; 1007 1008 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1009 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1010 for (i=0;i<nzs-1;i++) { 1011 PetscScalar vals[2]; 1012 PetscInt cols[2]; 1013 1014 cols[0] = idxs[i]; 1015 cols[1] = idxs[nzs-1]; 1016 vals[0] = 1.; 1017 vals[1] = 1.; 1018 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1019 } 1020 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1021 for (i=0;i<nzs-1;i++) array[i] = -1.; 1022 array[nzs-1] = 1.; 1023 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1024 /* store local idxs for p0 */ 1025 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1026 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1027 ierr = PetscFree(array);CHKERRQ(ierr); 1028 } 1029 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1030 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1031 /* project if needed */ 1032 if (pcbddc->benign_change_explicit) { 1033 Mat M; 1034 1035 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1036 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1037 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1038 ierr = MatDestroy(&M);CHKERRQ(ierr); 1039 } 1040 /* store global idxs for p0 */ 1041 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1042 } 1043 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1044 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1045 1046 /* determines if the coarse solver will be singular or not */ 1047 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1048 /* determines if the problem has subdomains with 0 pressure block */ 1049 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1050 *zerodiaglocal = zerodiag; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1056 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1057 { 1058 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1059 PetscScalar *array; 1060 PetscErrorCode ierr; 1061 1062 PetscFunctionBegin; 1063 if (!pcbddc->benign_sf) { 1064 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1065 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1066 } 1067 if (get) { 1068 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1069 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1070 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1071 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1072 } else { 1073 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1074 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1075 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1076 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1077 } 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1083 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1084 { 1085 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 /* TODO: add error checking 1090 - avoid nested pop (or push) calls. 1091 - cannot push before pop. 1092 - cannot call this if pcbddc->local_mat is NULL 1093 */ 1094 if (!pcbddc->benign_n) { 1095 PetscFunctionReturn(0); 1096 } 1097 if (pop) { 1098 if (pcbddc->benign_change_explicit) { 1099 IS is_p0; 1100 MatReuse reuse; 1101 1102 /* extract B_0 */ 1103 reuse = MAT_INITIAL_MATRIX; 1104 if (pcbddc->benign_B0) { 1105 reuse = MAT_REUSE_MATRIX; 1106 } 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1108 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1109 /* remove rows and cols from local problem */ 1110 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1111 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1112 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1113 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1114 } else { 1115 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1116 PetscScalar *vals; 1117 PetscInt i,n,*idxs_ins; 1118 1119 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1120 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1121 if (!pcbddc->benign_B0) { 1122 PetscInt *nnz; 1123 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1124 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1125 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1126 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1127 for (i=0;i<pcbddc->benign_n;i++) { 1128 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1129 nnz[i] = n - nnz[i]; 1130 } 1131 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1132 ierr = PetscFree(nnz);CHKERRQ(ierr); 1133 } 1134 1135 for (i=0;i<pcbddc->benign_n;i++) { 1136 PetscScalar *array; 1137 PetscInt *idxs,j,nz,cum; 1138 1139 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1140 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1141 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1142 for (j=0;j<nz;j++) vals[j] = 1.; 1143 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1144 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1145 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1146 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1147 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1148 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1149 cum = 0; 1150 for (j=0;j<n;j++) { 1151 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1152 vals[cum] = array[j]; 1153 idxs_ins[cum] = j; 1154 cum++; 1155 } 1156 } 1157 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1158 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1159 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1160 } 1161 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1162 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1164 } 1165 } else { /* push */ 1166 if (pcbddc->benign_change_explicit) { 1167 PetscInt i; 1168 1169 for (i=0;i<pcbddc->benign_n;i++) { 1170 PetscScalar *B0_vals; 1171 PetscInt *B0_cols,B0_ncol; 1172 1173 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1174 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1175 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1176 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1177 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1178 } 1179 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1180 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1183 } 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1190 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1191 { 1192 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1193 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1194 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1195 PetscBLASInt *B_iwork,*B_ifail; 1196 PetscScalar *work,lwork; 1197 PetscScalar *St,*S,*eigv; 1198 PetscScalar *Sarray,*Starray; 1199 PetscReal *eigs,thresh; 1200 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1201 PetscBool allocated_S_St; 1202 #if defined(PETSC_USE_COMPLEX) 1203 PetscReal *rwork; 1204 #endif 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1209 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1210 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1211 1212 if (pcbddc->dbg_flag) { 1213 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1214 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1216 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1217 } 1218 1219 if (pcbddc->dbg_flag) { 1220 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1221 } 1222 1223 /* max size of subsets */ 1224 mss = 0; 1225 for (i=0;i<sub_schurs->n_subs;i++) { 1226 PetscInt subset_size; 1227 1228 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1229 mss = PetscMax(mss,subset_size); 1230 } 1231 1232 /* min/max and threshold */ 1233 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1234 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1235 nmax = PetscMax(nmin,nmax); 1236 allocated_S_St = PETSC_FALSE; 1237 if (nmin) { 1238 allocated_S_St = PETSC_TRUE; 1239 } 1240 1241 /* allocate lapack workspace */ 1242 cum = cum2 = 0; 1243 maxneigs = 0; 1244 for (i=0;i<sub_schurs->n_subs;i++) { 1245 PetscInt n,subset_size; 1246 1247 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1248 n = PetscMin(subset_size,nmax); 1249 cum += subset_size; 1250 cum2 += subset_size*n; 1251 maxneigs = PetscMax(maxneigs,n); 1252 } 1253 if (mss) { 1254 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1255 PetscBLASInt B_itype = 1; 1256 PetscBLASInt B_N = mss; 1257 PetscReal zero = 0.0; 1258 PetscReal eps = 0.0; /* dlamch? */ 1259 1260 B_lwork = -1; 1261 S = NULL; 1262 St = NULL; 1263 eigs = NULL; 1264 eigv = NULL; 1265 B_iwork = NULL; 1266 B_ifail = NULL; 1267 #if defined(PETSC_USE_COMPLEX) 1268 rwork = NULL; 1269 #endif 1270 thresh = 1.0; 1271 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1272 #if defined(PETSC_USE_COMPLEX) 1273 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1274 #else 1275 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1276 #endif 1277 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1278 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1279 } else { 1280 /* TODO */ 1281 } 1282 } else { 1283 lwork = 0; 1284 } 1285 1286 nv = 0; 1287 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1288 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1289 } 1290 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1291 if (allocated_S_St) { 1292 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1293 } 1294 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1295 #if defined(PETSC_USE_COMPLEX) 1296 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1297 #endif 1298 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1299 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1300 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1301 nv+cum,&pcbddc->adaptive_constraints_idxs, 1302 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1303 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1304 1305 maxneigs = 0; 1306 cum = cumarray = 0; 1307 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1308 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1309 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1310 const PetscInt *idxs; 1311 1312 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1313 for (cum=0;cum<nv;cum++) { 1314 pcbddc->adaptive_constraints_n[cum] = 1; 1315 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1316 pcbddc->adaptive_constraints_data[cum] = 1.0; 1317 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1318 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1319 } 1320 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1321 } 1322 1323 if (mss) { /* multilevel */ 1324 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1325 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1326 } 1327 1328 thresh = pcbddc->adaptive_threshold; 1329 for (i=0;i<sub_schurs->n_subs;i++) { 1330 const PetscInt *idxs; 1331 PetscReal upper,lower; 1332 PetscInt j,subset_size,eigs_start = 0; 1333 PetscBLASInt B_N; 1334 PetscBool same_data = PETSC_FALSE; 1335 1336 if (pcbddc->use_deluxe_scaling) { 1337 upper = PETSC_MAX_REAL; 1338 lower = thresh; 1339 } else { 1340 upper = 1./thresh; 1341 lower = 0.; 1342 } 1343 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1344 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1345 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1346 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1347 if (sub_schurs->is_hermitian) { 1348 PetscInt j,k; 1349 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1350 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1351 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1352 } 1353 for (j=0;j<subset_size;j++) { 1354 for (k=j;k<subset_size;k++) { 1355 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1356 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1357 } 1358 } 1359 } else { 1360 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1361 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1362 } 1363 } else { 1364 S = Sarray + cumarray; 1365 St = Starray + cumarray; 1366 } 1367 /* see if we can save some work */ 1368 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1369 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1370 } 1371 1372 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1373 B_neigs = 0; 1374 } else { 1375 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1376 PetscBLASInt B_itype = 1; 1377 PetscBLASInt B_IL, B_IU; 1378 PetscReal eps = -1.0; /* dlamch? */ 1379 PetscInt nmin_s; 1380 PetscBool compute_range = PETSC_FALSE; 1381 1382 if (pcbddc->dbg_flag) { 1383 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1384 } 1385 1386 compute_range = PETSC_FALSE; 1387 if (thresh > 1.+PETSC_SMALL && !same_data) { 1388 compute_range = PETSC_TRUE; 1389 } 1390 1391 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1392 if (compute_range) { 1393 1394 /* ask for eigenvalues larger than thresh */ 1395 #if defined(PETSC_USE_COMPLEX) 1396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1397 #else 1398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1399 #endif 1400 } else if (!same_data) { 1401 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1402 B_IL = 1; 1403 #if defined(PETSC_USE_COMPLEX) 1404 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1405 #else 1406 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1407 #endif 1408 } else { /* same_data is true, so get the adaptive function requested by the user */ 1409 PetscInt k; 1410 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1411 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1412 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1413 nmin = nmax; 1414 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1415 for (k=0;k<nmax;k++) { 1416 eigs[k] = 1./PETSC_SMALL; 1417 eigv[k*(subset_size+1)] = 1.0; 1418 } 1419 } 1420 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1421 if (B_ierr) { 1422 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1423 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1424 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1425 } 1426 1427 if (B_neigs > nmax) { 1428 if (pcbddc->dbg_flag) { 1429 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1430 } 1431 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1432 B_neigs = nmax; 1433 } 1434 1435 nmin_s = PetscMin(nmin,B_N); 1436 if (B_neigs < nmin_s) { 1437 PetscBLASInt B_neigs2; 1438 1439 if (pcbddc->use_deluxe_scaling) { 1440 B_IL = B_N - nmin_s + 1; 1441 B_IU = B_N - B_neigs; 1442 } else { 1443 B_IL = B_neigs + 1; 1444 B_IU = nmin_s; 1445 } 1446 if (pcbddc->dbg_flag) { 1447 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1448 } 1449 if (sub_schurs->is_hermitian) { 1450 PetscInt j,k; 1451 for (j=0;j<subset_size;j++) { 1452 for (k=j;k<subset_size;k++) { 1453 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1454 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1455 } 1456 } 1457 } else { 1458 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1459 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1460 } 1461 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1462 #if defined(PETSC_USE_COMPLEX) 1463 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1464 #else 1465 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1466 #endif 1467 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1468 B_neigs += B_neigs2; 1469 } 1470 if (B_ierr) { 1471 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1472 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1473 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1474 } 1475 if (pcbddc->dbg_flag) { 1476 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1477 for (j=0;j<B_neigs;j++) { 1478 if (eigs[j] == 0.0) { 1479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1480 } else { 1481 if (pcbddc->use_deluxe_scaling) { 1482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1483 } else { 1484 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1485 } 1486 } 1487 } 1488 } 1489 } else { 1490 /* TODO */ 1491 } 1492 } 1493 /* change the basis back to the original one */ 1494 if (sub_schurs->change) { 1495 Mat change,phi,phit; 1496 1497 if (pcbddc->dbg_flag > 1) { 1498 PetscInt ii; 1499 for (ii=0;ii<B_neigs;ii++) { 1500 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1501 for (j=0;j<B_N;j++) { 1502 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1503 } 1504 } 1505 } 1506 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1507 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1508 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1509 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1510 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1511 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1512 } 1513 maxneigs = PetscMax(B_neigs,maxneigs); 1514 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1515 if (B_neigs) { 1516 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1517 1518 if (pcbddc->dbg_flag > 1) { 1519 PetscInt ii; 1520 for (ii=0;ii<B_neigs;ii++) { 1521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1522 for (j=0;j<B_N;j++) { 1523 #if defined(PETSC_USE_COMPLEX) 1524 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1525 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1527 #else 1528 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1529 #endif 1530 } 1531 } 1532 } 1533 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1534 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1535 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1536 cum++; 1537 } 1538 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1539 /* shift for next computation */ 1540 cumarray += subset_size*subset_size; 1541 } 1542 if (pcbddc->dbg_flag) { 1543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1544 } 1545 1546 if (mss) { 1547 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1548 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1549 /* destroy matrices (junk) */ 1550 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1551 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1552 } 1553 if (allocated_S_St) { 1554 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1555 } 1556 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1557 #if defined(PETSC_USE_COMPLEX) 1558 ierr = PetscFree(rwork);CHKERRQ(ierr); 1559 #endif 1560 if (pcbddc->dbg_flag) { 1561 PetscInt maxneigs_r; 1562 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1563 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "PCBDDCSetUpSolvers" 1570 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1571 { 1572 PetscScalar *coarse_submat_vals; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1577 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1578 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1579 1580 /* Setup local neumann solver ksp_R */ 1581 /* PCBDDCSetUpLocalScatters should be called first! */ 1582 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1583 1584 /* 1585 Setup local correction and local part of coarse basis. 1586 Gives back the dense local part of the coarse matrix in column major ordering 1587 */ 1588 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1589 1590 /* Compute total number of coarse nodes and setup coarse solver */ 1591 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1592 1593 /* free */ 1594 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "PCBDDCResetCustomization" 1600 PetscErrorCode PCBDDCResetCustomization(PC pc) 1601 { 1602 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1603 PetscErrorCode ierr; 1604 1605 PetscFunctionBegin; 1606 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1607 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1608 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1609 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1610 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1611 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1612 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1613 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1614 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1615 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1616 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "PCBDDCResetTopography" 1622 PetscErrorCode PCBDDCResetTopography(PC pc) 1623 { 1624 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1625 PetscInt i; 1626 PetscErrorCode ierr; 1627 1628 PetscFunctionBegin; 1629 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1630 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1631 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1632 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1633 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1634 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1635 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1636 for (i=0;i<pcbddc->n_local_subs;i++) { 1637 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1638 } 1639 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1640 if (pcbddc->sub_schurs) { 1641 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1642 } 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "PCBDDCResetSolvers" 1648 PetscErrorCode PCBDDCResetSolvers(PC pc) 1649 { 1650 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1651 PetscErrorCode ierr; 1652 1653 PetscFunctionBegin; 1654 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1655 if (pcbddc->coarse_phi_B) { 1656 PetscScalar *array; 1657 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1658 ierr = PetscFree(array);CHKERRQ(ierr); 1659 } 1660 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1661 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1662 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1663 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1664 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1665 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1666 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1667 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1668 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1669 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1670 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1671 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1672 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1673 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1674 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1675 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1676 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1677 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1678 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1679 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1680 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1681 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1682 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1683 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1684 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1685 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1686 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1687 if (pcbddc->benign_zerodiag_subs) { 1688 PetscInt i; 1689 for (i=0;i<pcbddc->benign_n;i++) { 1690 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1691 } 1692 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1693 } 1694 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1700 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1701 { 1702 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1703 PC_IS *pcis = (PC_IS*)pc->data; 1704 VecType impVecType; 1705 PetscInt n_constraints,n_R,old_size; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1710 /* get sizes */ 1711 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1712 n_R = pcis->n - pcbddc->n_vertices; 1713 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1714 /* local work vectors (try to avoid unneeded work)*/ 1715 /* R nodes */ 1716 old_size = -1; 1717 if (pcbddc->vec1_R) { 1718 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1719 } 1720 if (n_R != old_size) { 1721 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1722 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1723 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1724 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1725 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1726 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1727 } 1728 /* local primal dofs */ 1729 old_size = -1; 1730 if (pcbddc->vec1_P) { 1731 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1732 } 1733 if (pcbddc->local_primal_size != old_size) { 1734 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1735 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1736 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1737 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1738 } 1739 /* local explicit constraints */ 1740 old_size = -1; 1741 if (pcbddc->vec1_C) { 1742 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1743 } 1744 if (n_constraints && n_constraints != old_size) { 1745 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1746 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1747 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1748 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "PCBDDCSetUpCorrection" 1755 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1756 { 1757 PetscErrorCode ierr; 1758 /* pointers to pcis and pcbddc */ 1759 PC_IS* pcis = (PC_IS*)pc->data; 1760 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1761 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1762 /* submatrices of local problem */ 1763 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1764 /* submatrices of local coarse problem */ 1765 Mat S_VV,S_CV,S_VC,S_CC; 1766 /* working matrices */ 1767 Mat C_CR; 1768 /* additional working stuff */ 1769 PC pc_R; 1770 Mat F; 1771 Vec dummy_vec; 1772 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1773 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1774 PetscScalar *work; 1775 PetscInt *idx_V_B; 1776 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1777 PetscInt i,n_R,n_D,n_B; 1778 1779 /* some shortcuts to scalars */ 1780 PetscScalar one=1.0,m_one=-1.0; 1781 1782 PetscFunctionBegin; 1783 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1784 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1785 } 1786 1787 /* Set Non-overlapping dimensions */ 1788 n_vertices = pcbddc->n_vertices; 1789 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1790 n_B = pcis->n_B; 1791 n_D = pcis->n - n_B; 1792 n_R = pcis->n - n_vertices; 1793 1794 /* vertices in boundary numbering */ 1795 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1796 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1797 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1798 1799 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1800 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1801 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1802 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1803 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1804 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1805 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1806 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1807 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1808 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1809 1810 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1811 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1812 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1813 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1814 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1815 lda_rhs = n_R; 1816 need_benign_correction = PETSC_FALSE; 1817 if (isLU || isILU || isCHOL) { 1818 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1819 } else if (sub_schurs && sub_schurs->reuse_solver) { 1820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1821 MatFactorType type; 1822 1823 F = reuse_solver->F; 1824 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1825 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1826 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1827 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1828 } else { 1829 F = NULL; 1830 } 1831 1832 /* allocate workspace */ 1833 n = 0; 1834 if (n_constraints) { 1835 n += lda_rhs*n_constraints; 1836 } 1837 if (n_vertices) { 1838 n = PetscMax(2*lda_rhs*n_vertices,n); 1839 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1840 } 1841 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1842 1843 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1844 dummy_vec = NULL; 1845 if (need_benign_correction && lda_rhs != n_R && F) { 1846 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1847 } 1848 1849 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1850 if (n_constraints) { 1851 Mat M1,M2,M3,C_B; 1852 IS is_aux; 1853 PetscScalar *array,*array2; 1854 1855 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1856 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1857 1858 /* Extract constraints on R nodes: C_{CR} */ 1859 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1860 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1861 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1862 1863 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1864 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1865 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1866 for (i=0;i<n_constraints;i++) { 1867 const PetscScalar *row_cmat_values; 1868 const PetscInt *row_cmat_indices; 1869 PetscInt size_of_constraint,j; 1870 1871 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1872 for (j=0;j<size_of_constraint;j++) { 1873 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1874 } 1875 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1876 } 1877 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1878 if (F) { 1879 Mat B; 1880 1881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1882 if (need_benign_correction) { 1883 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1884 1885 /* rhs is already zero on interior dofs, no need to change the rhs */ 1886 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1887 } 1888 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1889 if (need_benign_correction) { 1890 PetscScalar *marr; 1891 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1892 1893 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1894 if (lda_rhs != n_R) { 1895 for (i=0;i<n_constraints;i++) { 1896 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1897 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1898 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1899 } 1900 } else { 1901 for (i=0;i<n_constraints;i++) { 1902 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1903 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1904 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1905 } 1906 } 1907 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1908 } 1909 ierr = MatDestroy(&B);CHKERRQ(ierr); 1910 } else { 1911 PetscScalar *marr; 1912 1913 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1914 for (i=0;i<n_constraints;i++) { 1915 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1916 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1917 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1919 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1920 } 1921 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1922 } 1923 if (!pcbddc->switch_static) { 1924 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1925 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1926 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1927 for (i=0;i<n_constraints;i++) { 1928 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1929 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1930 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1931 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1932 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1933 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1934 } 1935 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1936 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1937 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1938 } else { 1939 if (lda_rhs != n_R) { 1940 IS dummy; 1941 1942 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1943 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1944 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1945 } else { 1946 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1947 pcbddc->local_auxmat2 = local_auxmat2_R; 1948 } 1949 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1950 } 1951 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1952 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1953 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1954 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1955 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1956 if (isCHOL) { 1957 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1958 } else { 1959 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1960 } 1961 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1962 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1963 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1964 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1965 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1966 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1967 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1968 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1969 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1970 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1971 } 1972 1973 /* Get submatrices from subdomain matrix */ 1974 if (n_vertices) { 1975 IS is_aux; 1976 1977 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1978 IS tis; 1979 1980 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1981 ierr = ISSort(tis);CHKERRQ(ierr); 1982 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1983 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1984 } else { 1985 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1986 } 1987 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1988 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1989 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1990 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1991 } 1992 1993 /* Matrix of coarse basis functions (local) */ 1994 if (pcbddc->coarse_phi_B) { 1995 PetscInt on_B,on_primal,on_D=n_D; 1996 if (pcbddc->coarse_phi_D) { 1997 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1998 } 1999 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2000 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2001 PetscScalar *marray; 2002 2003 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2004 ierr = PetscFree(marray);CHKERRQ(ierr); 2005 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2006 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2007 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2008 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2009 } 2010 } 2011 2012 if (!pcbddc->coarse_phi_B) { 2013 PetscScalar *marray; 2014 2015 n = n_B*pcbddc->local_primal_size; 2016 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2017 n += n_D*pcbddc->local_primal_size; 2018 } 2019 if (!pcbddc->symmetric_primal) { 2020 n *= 2; 2021 } 2022 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2023 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2024 n = n_B*pcbddc->local_primal_size; 2025 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2026 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2027 n += n_D*pcbddc->local_primal_size; 2028 } 2029 if (!pcbddc->symmetric_primal) { 2030 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2031 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2032 n = n_B*pcbddc->local_primal_size; 2033 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2034 } 2035 } else { 2036 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2037 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2038 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2039 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2040 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2041 } 2042 } 2043 } 2044 2045 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2046 p0_lidx_I = NULL; 2047 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2048 const PetscInt *idxs; 2049 2050 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2051 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2052 for (i=0;i<pcbddc->benign_n;i++) { 2053 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2054 } 2055 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2056 } 2057 2058 /* vertices */ 2059 if (n_vertices) { 2060 2061 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2062 2063 if (n_R) { 2064 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2065 PetscBLASInt B_N,B_one = 1; 2066 PetscScalar *x,*y; 2067 PetscBool isseqaij; 2068 2069 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2070 if (need_benign_correction) { 2071 ISLocalToGlobalMapping RtoN; 2072 IS is_p0; 2073 PetscInt *idxs_p0,n; 2074 2075 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2076 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2077 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2078 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2079 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2080 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2081 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2082 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2083 } 2084 2085 if (lda_rhs == n_R) { 2086 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2087 } else { 2088 PetscScalar *av,*array; 2089 const PetscInt *xadj,*adjncy; 2090 PetscInt n; 2091 PetscBool flg_row; 2092 2093 array = work+lda_rhs*n_vertices; 2094 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2095 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2096 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2097 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2098 for (i=0;i<n;i++) { 2099 PetscInt j; 2100 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2101 } 2102 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2103 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2105 } 2106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2107 if (need_benign_correction) { 2108 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2109 PetscScalar *marr; 2110 2111 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2112 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2113 2114 | 0 0 0 | (V) 2115 L = | 0 0 -1 | (P-p0) 2116 | 0 0 -1 | (p0) 2117 2118 */ 2119 for (i=0;i<reuse_solver->benign_n;i++) { 2120 const PetscScalar *vals; 2121 const PetscInt *idxs,*idxs_zero; 2122 PetscInt n,j,nz; 2123 2124 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2125 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2126 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2127 for (j=0;j<n;j++) { 2128 PetscScalar val = vals[j]; 2129 PetscInt k,col = idxs[j]; 2130 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2131 } 2132 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2133 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2134 } 2135 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2136 } 2137 if (F) { 2138 /* need to correct the rhs */ 2139 if (need_benign_correction) { 2140 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2141 PetscScalar *marr; 2142 2143 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2144 if (lda_rhs != n_R) { 2145 for (i=0;i<n_vertices;i++) { 2146 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2147 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2148 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2149 } 2150 } else { 2151 for (i=0;i<n_vertices;i++) { 2152 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2153 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2154 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2155 } 2156 } 2157 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2158 } 2159 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2160 /* need to correct the solution */ 2161 if (need_benign_correction) { 2162 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2163 PetscScalar *marr; 2164 2165 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2166 if (lda_rhs != n_R) { 2167 for (i=0;i<n_vertices;i++) { 2168 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2169 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2170 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2171 } 2172 } else { 2173 for (i=0;i<n_vertices;i++) { 2174 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2175 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2176 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2177 } 2178 } 2179 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2180 } 2181 } else { 2182 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2183 for (i=0;i<n_vertices;i++) { 2184 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2185 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2186 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2187 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2188 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2189 } 2190 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2191 } 2192 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2193 /* S_VV and S_CV */ 2194 if (n_constraints) { 2195 Mat B; 2196 2197 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2198 for (i=0;i<n_vertices;i++) { 2199 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2200 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2201 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2202 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2203 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2204 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2205 } 2206 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2207 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2208 ierr = MatDestroy(&B);CHKERRQ(ierr); 2209 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2210 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2211 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2212 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2213 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2214 ierr = MatDestroy(&B);CHKERRQ(ierr); 2215 } 2216 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2217 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2218 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2219 } 2220 if (lda_rhs != n_R) { 2221 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2222 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2223 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2224 } 2225 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2226 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2227 if (need_benign_correction) { 2228 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2229 PetscScalar *marr,*sums; 2230 2231 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2232 ierr = MatDenseGetArray(S_VVt,&marr); 2233 for (i=0;i<reuse_solver->benign_n;i++) { 2234 const PetscScalar *vals; 2235 const PetscInt *idxs,*idxs_zero; 2236 PetscInt n,j,nz; 2237 2238 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2239 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2240 for (j=0;j<n_vertices;j++) { 2241 PetscInt k; 2242 sums[j] = 0.; 2243 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2244 } 2245 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2246 for (j=0;j<n;j++) { 2247 PetscScalar val = vals[j]; 2248 PetscInt k; 2249 for (k=0;k<n_vertices;k++) { 2250 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2251 } 2252 } 2253 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2254 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2255 } 2256 ierr = PetscFree(sums);CHKERRQ(ierr); 2257 ierr = MatDenseRestoreArray(S_VVt,&marr); 2258 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2259 } 2260 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2261 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2262 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2263 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2264 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2265 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2266 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2267 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2268 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2269 } else { 2270 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2271 } 2272 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2273 2274 /* coarse basis functions */ 2275 for (i=0;i<n_vertices;i++) { 2276 PetscScalar *y; 2277 2278 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2279 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2280 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2281 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2282 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2283 y[n_B*i+idx_V_B[i]] = 1.0; 2284 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2285 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2286 2287 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2288 PetscInt j; 2289 2290 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2291 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2292 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2293 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2294 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2295 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2296 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2297 } 2298 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2299 } 2300 /* if n_R == 0 the object is not destroyed */ 2301 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2302 } 2303 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2304 2305 if (n_constraints) { 2306 Mat B; 2307 2308 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2309 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2310 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2311 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2312 if (n_vertices) { 2313 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2314 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2315 } else { 2316 Mat S_VCt; 2317 2318 if (lda_rhs != n_R) { 2319 ierr = MatDestroy(&B);CHKERRQ(ierr); 2320 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2321 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2322 } 2323 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2324 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2325 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2326 } 2327 } 2328 ierr = MatDestroy(&B);CHKERRQ(ierr); 2329 /* coarse basis functions */ 2330 for (i=0;i<n_constraints;i++) { 2331 PetscScalar *y; 2332 2333 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2334 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2335 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2336 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2337 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2338 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2339 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2340 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2341 PetscInt j; 2342 2343 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2344 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2345 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2346 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2347 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2348 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2349 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2350 } 2351 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2352 } 2353 } 2354 if (n_constraints) { 2355 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2356 } 2357 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2358 2359 /* coarse matrix entries relative to B_0 */ 2360 if (pcbddc->benign_n) { 2361 Mat B0_B,B0_BPHI; 2362 IS is_dummy; 2363 PetscScalar *data; 2364 PetscInt j; 2365 2366 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2367 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2368 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2369 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2370 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2371 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2372 for (j=0;j<pcbddc->benign_n;j++) { 2373 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2374 for (i=0;i<pcbddc->local_primal_size;i++) { 2375 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2376 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2377 } 2378 } 2379 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2380 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2381 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2382 } 2383 2384 /* compute other basis functions for non-symmetric problems */ 2385 if (!pcbddc->symmetric_primal) { 2386 Mat B_V=NULL,B_C=NULL; 2387 PetscScalar *marray; 2388 2389 if (n_constraints) { 2390 Mat S_CCT,C_CRT; 2391 2392 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2393 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2394 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2395 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2396 if (n_vertices) { 2397 Mat S_VCT; 2398 2399 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2400 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2401 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2402 } 2403 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2404 } else { 2405 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2406 } 2407 if (n_vertices && n_R) { 2408 PetscScalar *av,*marray; 2409 const PetscInt *xadj,*adjncy; 2410 PetscInt n; 2411 PetscBool flg_row; 2412 2413 /* B_V = B_V - A_VR^T */ 2414 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2415 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2416 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2417 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2418 for (i=0;i<n;i++) { 2419 PetscInt j; 2420 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2421 } 2422 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2423 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2424 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2425 } 2426 2427 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2428 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2429 for (i=0;i<n_vertices;i++) { 2430 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2431 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2432 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2433 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2434 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2435 } 2436 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2437 if (B_C) { 2438 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2439 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2440 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2441 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2442 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2443 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2444 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2445 } 2446 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2447 } 2448 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2449 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2450 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2451 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2452 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2453 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2454 } 2455 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2456 /* coarse basis functions */ 2457 for (i=0;i<pcbddc->local_primal_size;i++) { 2458 PetscScalar *y; 2459 2460 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2461 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2462 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2463 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2464 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2465 if (i<n_vertices) { 2466 y[n_B*i+idx_V_B[i]] = 1.0; 2467 } 2468 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2469 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2470 2471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2472 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2473 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2474 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2475 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2476 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2477 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2478 } 2479 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2480 } 2481 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2482 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2483 } 2484 /* free memory */ 2485 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2486 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2487 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2488 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2489 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2490 ierr = PetscFree(work);CHKERRQ(ierr); 2491 if (n_vertices) { 2492 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2493 } 2494 if (n_constraints) { 2495 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2496 } 2497 /* Checking coarse_sub_mat and coarse basis functios */ 2498 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2499 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2500 if (pcbddc->dbg_flag) { 2501 Mat coarse_sub_mat; 2502 Mat AUXMAT,TM1,TM2,TM3,TM4; 2503 Mat coarse_phi_D,coarse_phi_B; 2504 Mat coarse_psi_D,coarse_psi_B; 2505 Mat A_II,A_BB,A_IB,A_BI; 2506 Mat C_B,CPHI; 2507 IS is_dummy; 2508 Vec mones; 2509 MatType checkmattype=MATSEQAIJ; 2510 PetscReal real_value; 2511 2512 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2513 Mat A; 2514 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2515 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2516 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2517 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2518 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2519 ierr = MatDestroy(&A);CHKERRQ(ierr); 2520 } else { 2521 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2522 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2523 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2524 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2525 } 2526 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2527 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2528 if (!pcbddc->symmetric_primal) { 2529 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2530 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2531 } 2532 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2533 2534 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2537 if (!pcbddc->symmetric_primal) { 2538 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2539 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2540 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2541 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2542 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2543 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2544 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2545 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2546 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2547 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2548 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2549 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2550 } else { 2551 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2552 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2553 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2554 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2555 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2556 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2557 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2558 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2559 } 2560 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2561 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2562 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2563 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2564 if (pcbddc->benign_n) { 2565 Mat B0_B,B0_BPHI; 2566 PetscScalar *data,*data2; 2567 PetscInt j; 2568 2569 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2570 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2571 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2572 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2573 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2574 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2575 for (j=0;j<pcbddc->benign_n;j++) { 2576 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2577 for (i=0;i<pcbddc->local_primal_size;i++) { 2578 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2579 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2580 } 2581 } 2582 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2583 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2584 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2585 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2586 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2587 } 2588 #if 0 2589 { 2590 PetscViewer viewer; 2591 char filename[256]; 2592 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2593 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2594 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2595 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2596 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2597 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2598 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2599 if (save_change) { 2600 Mat phi_B; 2601 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2602 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2603 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2604 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2605 } else { 2606 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2607 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2608 } 2609 if (pcbddc->coarse_phi_D) { 2610 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2611 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2612 } 2613 if (pcbddc->coarse_psi_B) { 2614 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2615 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2616 } 2617 if (pcbddc->coarse_psi_D) { 2618 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2619 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2620 } 2621 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2622 } 2623 #endif 2624 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2625 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2626 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2628 2629 /* check constraints */ 2630 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2631 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2632 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2633 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2634 } else { 2635 PetscScalar *data; 2636 Mat tmat; 2637 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2639 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2640 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2641 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2642 } 2643 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2644 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2645 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2646 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2648 if (!pcbddc->symmetric_primal) { 2649 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2650 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2651 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2652 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2654 } 2655 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2656 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2657 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2658 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2660 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2661 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2662 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2663 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2664 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2665 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2666 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2667 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2668 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2669 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2670 if (!pcbddc->symmetric_primal) { 2671 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2672 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2673 } 2674 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2675 } 2676 /* get back data */ 2677 *coarse_submat_vals_n = coarse_submat_vals; 2678 PetscFunctionReturn(0); 2679 } 2680 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2683 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2684 { 2685 Mat *work_mat; 2686 IS isrow_s,iscol_s; 2687 PetscBool rsorted,csorted; 2688 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2689 PetscErrorCode ierr; 2690 2691 PetscFunctionBegin; 2692 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2693 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2694 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2695 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2696 2697 if (!rsorted) { 2698 const PetscInt *idxs; 2699 PetscInt *idxs_sorted,i; 2700 2701 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2702 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2703 for (i=0;i<rsize;i++) { 2704 idxs_perm_r[i] = i; 2705 } 2706 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2707 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2708 for (i=0;i<rsize;i++) { 2709 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2710 } 2711 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2712 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2713 } else { 2714 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2715 isrow_s = isrow; 2716 } 2717 2718 if (!csorted) { 2719 if (isrow == iscol) { 2720 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2721 iscol_s = isrow_s; 2722 } else { 2723 const PetscInt *idxs; 2724 PetscInt *idxs_sorted,i; 2725 2726 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2727 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2728 for (i=0;i<csize;i++) { 2729 idxs_perm_c[i] = i; 2730 } 2731 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2732 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2733 for (i=0;i<csize;i++) { 2734 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2735 } 2736 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2738 } 2739 } else { 2740 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2741 iscol_s = iscol; 2742 } 2743 2744 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2745 2746 if (!rsorted || !csorted) { 2747 Mat new_mat; 2748 IS is_perm_r,is_perm_c; 2749 2750 if (!rsorted) { 2751 PetscInt *idxs_r,i; 2752 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2753 for (i=0;i<rsize;i++) { 2754 idxs_r[idxs_perm_r[i]] = i; 2755 } 2756 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2757 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2758 } else { 2759 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2760 } 2761 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2762 2763 if (!csorted) { 2764 if (isrow_s == iscol_s) { 2765 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2766 is_perm_c = is_perm_r; 2767 } else { 2768 PetscInt *idxs_c,i; 2769 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2770 for (i=0;i<csize;i++) { 2771 idxs_c[idxs_perm_c[i]] = i; 2772 } 2773 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2774 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2775 } 2776 } else { 2777 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2778 } 2779 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2780 2781 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2782 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2783 work_mat[0] = new_mat; 2784 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2785 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2786 } 2787 2788 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2789 *B = work_mat[0]; 2790 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2791 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2792 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2798 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2799 { 2800 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2801 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2802 Mat new_mat; 2803 IS is_local,is_global; 2804 PetscInt local_size; 2805 PetscBool isseqaij; 2806 PetscErrorCode ierr; 2807 2808 PetscFunctionBegin; 2809 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2810 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2811 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2812 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2813 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2814 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2815 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2816 2817 /* check */ 2818 if (pcbddc->dbg_flag) { 2819 Vec x,x_change; 2820 PetscReal error; 2821 2822 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2823 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2824 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2825 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2826 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2827 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2828 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2829 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2831 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2832 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2833 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2834 ierr = VecDestroy(&x);CHKERRQ(ierr); 2835 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2836 } 2837 2838 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2839 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2840 if (isseqaij) { 2841 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2842 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2843 } else { 2844 Mat work_mat; 2845 2846 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2847 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2848 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2849 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2850 } 2851 if (matis->A->symmetric_set) { 2852 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2853 #if !defined(PETSC_USE_COMPLEX) 2854 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2855 #endif 2856 } 2857 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2863 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2864 { 2865 PC_IS* pcis = (PC_IS*)(pc->data); 2866 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2867 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2868 PetscInt *idx_R_local=NULL; 2869 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2870 PetscInt vbs,bs; 2871 PetscBT bitmask=NULL; 2872 PetscErrorCode ierr; 2873 2874 PetscFunctionBegin; 2875 /* 2876 No need to setup local scatters if 2877 - primal space is unchanged 2878 AND 2879 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2880 AND 2881 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2882 */ 2883 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2884 PetscFunctionReturn(0); 2885 } 2886 /* destroy old objects */ 2887 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2888 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2889 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2890 /* Set Non-overlapping dimensions */ 2891 n_B = pcis->n_B; 2892 n_D = pcis->n - n_B; 2893 n_vertices = pcbddc->n_vertices; 2894 2895 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2896 2897 /* create auxiliary bitmask and allocate workspace */ 2898 if (!sub_schurs || !sub_schurs->reuse_solver) { 2899 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2900 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2901 for (i=0;i<n_vertices;i++) { 2902 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2903 } 2904 2905 for (i=0, n_R=0; i<pcis->n; i++) { 2906 if (!PetscBTLookup(bitmask,i)) { 2907 idx_R_local[n_R++] = i; 2908 } 2909 } 2910 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2911 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2912 2913 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2914 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2915 } 2916 2917 /* Block code */ 2918 vbs = 1; 2919 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2920 if (bs>1 && !(n_vertices%bs)) { 2921 PetscBool is_blocked = PETSC_TRUE; 2922 PetscInt *vary; 2923 if (!sub_schurs || !sub_schurs->reuse_solver) { 2924 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2925 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2926 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2927 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2928 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2929 for (i=0; i<pcis->n/bs; i++) { 2930 if (vary[i]!=0 && vary[i]!=bs) { 2931 is_blocked = PETSC_FALSE; 2932 break; 2933 } 2934 } 2935 ierr = PetscFree(vary);CHKERRQ(ierr); 2936 } else { 2937 /* Verify directly the R set */ 2938 for (i=0; i<n_R/bs; i++) { 2939 PetscInt j,node=idx_R_local[bs*i]; 2940 for (j=1; j<bs; j++) { 2941 if (node != idx_R_local[bs*i+j]-j) { 2942 is_blocked = PETSC_FALSE; 2943 break; 2944 } 2945 } 2946 } 2947 } 2948 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2949 vbs = bs; 2950 for (i=0;i<n_R/vbs;i++) { 2951 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2952 } 2953 } 2954 } 2955 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2956 if (sub_schurs && sub_schurs->reuse_solver) { 2957 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2958 2959 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2960 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2961 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2962 reuse_solver->is_R = pcbddc->is_R_local; 2963 } else { 2964 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2965 } 2966 2967 /* print some info if requested */ 2968 if (pcbddc->dbg_flag) { 2969 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2970 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2971 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2976 } 2977 2978 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2979 if (!sub_schurs || !sub_schurs->reuse_solver) { 2980 IS is_aux1,is_aux2; 2981 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2982 2983 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2984 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2985 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2986 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2987 for (i=0; i<n_D; i++) { 2988 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2989 } 2990 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2991 for (i=0, j=0; i<n_R; i++) { 2992 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2993 aux_array1[j++] = i; 2994 } 2995 } 2996 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2997 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2998 for (i=0, j=0; i<n_B; i++) { 2999 if (!PetscBTLookup(bitmask,is_indices[i])) { 3000 aux_array2[j++] = i; 3001 } 3002 } 3003 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3004 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3005 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3006 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3007 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3008 3009 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3010 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3011 for (i=0, j=0; i<n_R; i++) { 3012 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3013 aux_array1[j++] = i; 3014 } 3015 } 3016 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3017 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3018 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3019 } 3020 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3021 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3022 } else { 3023 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3024 IS tis; 3025 PetscInt schur_size; 3026 3027 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3028 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3029 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3030 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3031 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3032 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3033 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3034 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3035 } 3036 } 3037 PetscFunctionReturn(0); 3038 } 3039 3040 3041 #undef __FUNCT__ 3042 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3043 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3044 { 3045 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3046 PC_IS *pcis = (PC_IS*)pc->data; 3047 PC pc_temp; 3048 Mat A_RR; 3049 MatReuse reuse; 3050 PetscScalar m_one = -1.0; 3051 PetscReal value; 3052 PetscInt n_D,n_R; 3053 PetscBool check_corr[2],issbaij; 3054 PetscErrorCode ierr; 3055 /* prefixes stuff */ 3056 char dir_prefix[256],neu_prefix[256],str_level[16]; 3057 size_t len; 3058 3059 PetscFunctionBegin; 3060 3061 /* compute prefixes */ 3062 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3063 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3064 if (!pcbddc->current_level) { 3065 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3066 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3067 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3068 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3069 } else { 3070 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3071 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3072 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3073 len -= 15; /* remove "pc_bddc_coarse_" */ 3074 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3075 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3076 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3077 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3078 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3079 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3080 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3081 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3082 } 3083 3084 /* DIRICHLET PROBLEM */ 3085 if (dirichlet) { 3086 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3087 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3088 if (!sub_schurs || !sub_schurs->reuse_solver) { 3089 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3090 } 3091 if (pcbddc->dbg_flag) { 3092 Mat A_IIn; 3093 3094 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3095 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3096 pcis->A_II = A_IIn; 3097 } 3098 } 3099 if (pcbddc->local_mat->symmetric_set) { 3100 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3101 } 3102 /* Matrix for Dirichlet problem is pcis->A_II */ 3103 n_D = pcis->n - pcis->n_B; 3104 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3105 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3106 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3107 /* default */ 3108 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3109 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3110 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3111 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3112 if (issbaij) { 3113 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3114 } else { 3115 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3116 } 3117 /* Allow user's customization */ 3118 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3119 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3120 } 3121 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3122 if (sub_schurs && sub_schurs->reuse_solver) { 3123 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3124 3125 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3126 } 3127 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3128 if (!n_D) { 3129 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3130 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3131 } 3132 /* Set Up KSP for Dirichlet problem of BDDC */ 3133 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3134 /* set ksp_D into pcis data */ 3135 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3136 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3137 pcis->ksp_D = pcbddc->ksp_D; 3138 } 3139 3140 /* NEUMANN PROBLEM */ 3141 A_RR = 0; 3142 if (neumann) { 3143 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3144 PetscInt ibs,mbs; 3145 PetscBool issbaij; 3146 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3147 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3148 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3149 if (pcbddc->ksp_R) { /* already created ksp */ 3150 PetscInt nn_R; 3151 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3152 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3153 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3154 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3155 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3156 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3157 reuse = MAT_INITIAL_MATRIX; 3158 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3159 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3160 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3161 reuse = MAT_INITIAL_MATRIX; 3162 } else { /* safe to reuse the matrix */ 3163 reuse = MAT_REUSE_MATRIX; 3164 } 3165 } 3166 /* last check */ 3167 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3168 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3169 reuse = MAT_INITIAL_MATRIX; 3170 } 3171 } else { /* first time, so we need to create the matrix */ 3172 reuse = MAT_INITIAL_MATRIX; 3173 } 3174 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3175 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3176 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3177 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3178 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3179 if (matis->A == pcbddc->local_mat) { 3180 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3181 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3182 } else { 3183 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3184 } 3185 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3186 if (matis->A == pcbddc->local_mat) { 3187 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3188 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3189 } else { 3190 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3191 } 3192 } 3193 /* extract A_RR */ 3194 if (sub_schurs && sub_schurs->reuse_solver) { 3195 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3196 3197 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3198 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3199 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3200 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3201 } else { 3202 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3203 } 3204 } else { 3205 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3206 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3207 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3208 } 3209 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3210 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3211 } 3212 if (pcbddc->local_mat->symmetric_set) { 3213 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3214 } 3215 if (!pcbddc->ksp_R) { /* create object if not present */ 3216 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3217 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3218 /* default */ 3219 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3220 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3221 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3222 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3223 if (issbaij) { 3224 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3225 } else { 3226 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3227 } 3228 /* Allow user's customization */ 3229 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3230 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3231 } 3232 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3233 if (!n_R) { 3234 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3235 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3236 } 3237 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3238 /* Reuse solver if it is present */ 3239 if (sub_schurs && sub_schurs->reuse_solver) { 3240 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3241 3242 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3243 } 3244 /* Set Up KSP for Neumann problem of BDDC */ 3245 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3246 } 3247 3248 if (pcbddc->dbg_flag) { 3249 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3250 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3252 } 3253 3254 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3255 check_corr[0] = check_corr[1] = PETSC_FALSE; 3256 if (pcbddc->NullSpace_corr[0]) { 3257 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3258 } 3259 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3260 check_corr[0] = PETSC_TRUE; 3261 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3262 } 3263 if (neumann && pcbddc->NullSpace_corr[2]) { 3264 check_corr[1] = PETSC_TRUE; 3265 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3266 } 3267 3268 /* check Dirichlet and Neumann solvers */ 3269 if (pcbddc->dbg_flag) { 3270 if (dirichlet) { /* Dirichlet */ 3271 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3272 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3273 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3274 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3275 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3277 if (check_corr[0]) { 3278 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3279 } 3280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3281 } 3282 if (neumann) { /* Neumann */ 3283 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3284 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3285 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3286 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3287 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3289 if (check_corr[1]) { 3290 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3291 } 3292 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3293 } 3294 } 3295 /* free Neumann problem's matrix */ 3296 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3302 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3303 { 3304 PetscErrorCode ierr; 3305 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3306 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3307 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3308 3309 PetscFunctionBegin; 3310 if (!reuse_solver) { 3311 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3312 } 3313 if (!pcbddc->switch_static) { 3314 if (applytranspose && pcbddc->local_auxmat1) { 3315 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3316 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3317 } 3318 if (!reuse_solver) { 3319 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3320 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3321 } else { 3322 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3323 3324 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3325 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3326 } 3327 } else { 3328 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3329 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3330 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3331 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3332 if (applytranspose && pcbddc->local_auxmat1) { 3333 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3334 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3335 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3336 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3337 } 3338 } 3339 if (!reuse_solver || pcbddc->switch_static) { 3340 if (applytranspose) { 3341 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3342 } else { 3343 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3344 } 3345 } else { 3346 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3347 3348 if (applytranspose) { 3349 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3350 } else { 3351 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3352 } 3353 } 3354 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3355 if (!pcbddc->switch_static) { 3356 if (!reuse_solver) { 3357 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3358 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3359 } else { 3360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3361 3362 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3363 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3364 } 3365 if (!applytranspose && pcbddc->local_auxmat1) { 3366 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3367 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3368 } 3369 } else { 3370 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3371 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3372 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3373 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3374 if (!applytranspose && pcbddc->local_auxmat1) { 3375 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3376 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3377 } 3378 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3379 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3380 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3381 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3382 } 3383 PetscFunctionReturn(0); 3384 } 3385 3386 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3387 #undef __FUNCT__ 3388 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3389 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3390 { 3391 PetscErrorCode ierr; 3392 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3393 PC_IS* pcis = (PC_IS*) (pc->data); 3394 const PetscScalar zero = 0.0; 3395 3396 PetscFunctionBegin; 3397 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3398 if (!pcbddc->benign_apply_coarse_only) { 3399 if (applytranspose) { 3400 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3401 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3402 } else { 3403 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3404 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3405 } 3406 } else { 3407 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3408 } 3409 3410 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3411 if (pcbddc->benign_n) { 3412 PetscScalar *array; 3413 PetscInt j; 3414 3415 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3416 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3417 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3418 } 3419 3420 /* start communications from local primal nodes to rhs of coarse solver */ 3421 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3422 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3423 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3424 3425 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3426 if (pcbddc->coarse_ksp) { 3427 Mat coarse_mat; 3428 Vec rhs,sol; 3429 MatNullSpace nullsp; 3430 PetscBool isbddc = PETSC_FALSE; 3431 3432 if (pcbddc->benign_have_null) { 3433 PC coarse_pc; 3434 3435 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3436 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3437 /* we need to propagate to coarser levels the need for a possible benign correction */ 3438 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3439 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3440 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3441 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3442 } 3443 } 3444 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3445 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3446 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3447 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3448 if (nullsp) { 3449 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3450 } 3451 if (applytranspose) { 3452 if (pcbddc->benign_apply_coarse_only) { 3453 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3454 } else { 3455 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3456 } 3457 } else { 3458 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3459 PC coarse_pc; 3460 3461 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3462 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3463 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3464 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3465 } else { 3466 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3467 } 3468 } 3469 /* we don't need the benign correction at coarser levels anymore */ 3470 if (pcbddc->benign_have_null && isbddc) { 3471 PC coarse_pc; 3472 PC_BDDC* coarsepcbddc; 3473 3474 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3475 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3476 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3477 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3478 } 3479 if (nullsp) { 3480 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3481 } 3482 } 3483 3484 /* Local solution on R nodes */ 3485 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3486 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3487 } 3488 /* communications from coarse sol to local primal nodes */ 3489 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3490 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3491 3492 /* Sum contributions from the two levels */ 3493 if (!pcbddc->benign_apply_coarse_only) { 3494 if (applytranspose) { 3495 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3496 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3497 } else { 3498 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3499 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3500 } 3501 /* store p0 */ 3502 if (pcbddc->benign_n) { 3503 PetscScalar *array; 3504 PetscInt j; 3505 3506 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3507 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3508 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3509 } 3510 } else { /* expand the coarse solution */ 3511 if (applytranspose) { 3512 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3513 } else { 3514 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3515 } 3516 } 3517 PetscFunctionReturn(0); 3518 } 3519 3520 #undef __FUNCT__ 3521 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3522 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3523 { 3524 PetscErrorCode ierr; 3525 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3526 PetscScalar *array; 3527 Vec from,to; 3528 3529 PetscFunctionBegin; 3530 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3531 from = pcbddc->coarse_vec; 3532 to = pcbddc->vec1_P; 3533 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3534 Vec tvec; 3535 3536 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3537 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3538 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3539 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3540 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3541 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3542 } 3543 } else { /* from local to global -> put data in coarse right hand side */ 3544 from = pcbddc->vec1_P; 3545 to = pcbddc->coarse_vec; 3546 } 3547 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3548 PetscFunctionReturn(0); 3549 } 3550 3551 #undef __FUNCT__ 3552 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3553 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3554 { 3555 PetscErrorCode ierr; 3556 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3557 PetscScalar *array; 3558 Vec from,to; 3559 3560 PetscFunctionBegin; 3561 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3562 from = pcbddc->coarse_vec; 3563 to = pcbddc->vec1_P; 3564 } else { /* from local to global -> put data in coarse right hand side */ 3565 from = pcbddc->vec1_P; 3566 to = pcbddc->coarse_vec; 3567 } 3568 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3569 if (smode == SCATTER_FORWARD) { 3570 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3571 Vec tvec; 3572 3573 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3574 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3575 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3576 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3577 } 3578 } else { 3579 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3580 ierr = VecResetArray(from);CHKERRQ(ierr); 3581 } 3582 } 3583 PetscFunctionReturn(0); 3584 } 3585 3586 /* uncomment for testing purposes */ 3587 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3588 #undef __FUNCT__ 3589 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3590 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3591 { 3592 PetscErrorCode ierr; 3593 PC_IS* pcis = (PC_IS*)(pc->data); 3594 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3595 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3596 /* one and zero */ 3597 PetscScalar one=1.0,zero=0.0; 3598 /* space to store constraints and their local indices */ 3599 PetscScalar *constraints_data; 3600 PetscInt *constraints_idxs,*constraints_idxs_B; 3601 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3602 PetscInt *constraints_n; 3603 /* iterators */ 3604 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3605 /* BLAS integers */ 3606 PetscBLASInt lwork,lierr; 3607 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3608 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3609 /* reuse */ 3610 PetscInt olocal_primal_size,olocal_primal_size_cc; 3611 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3612 /* change of basis */ 3613 PetscBool qr_needed; 3614 PetscBT change_basis,qr_needed_idx; 3615 /* auxiliary stuff */ 3616 PetscInt *nnz,*is_indices; 3617 PetscInt ncc; 3618 /* some quantities */ 3619 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3620 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3621 3622 PetscFunctionBegin; 3623 /* Destroy Mat objects computed previously */ 3624 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3625 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3626 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3627 /* save info on constraints from previous setup (if any) */ 3628 olocal_primal_size = pcbddc->local_primal_size; 3629 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3630 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3631 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3632 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3633 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3634 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3635 3636 if (!pcbddc->adaptive_selection) { 3637 IS ISForVertices,*ISForFaces,*ISForEdges; 3638 MatNullSpace nearnullsp; 3639 const Vec *nearnullvecs; 3640 Vec *localnearnullsp; 3641 PetscScalar *array; 3642 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3643 PetscBool nnsp_has_cnst; 3644 /* LAPACK working arrays for SVD or POD */ 3645 PetscBool skip_lapack,boolforchange; 3646 PetscScalar *work; 3647 PetscReal *singular_vals; 3648 #if defined(PETSC_USE_COMPLEX) 3649 PetscReal *rwork; 3650 #endif 3651 #if defined(PETSC_MISSING_LAPACK_GESVD) 3652 PetscScalar *temp_basis,*correlation_mat; 3653 #else 3654 PetscBLASInt dummy_int=1; 3655 PetscScalar dummy_scalar=1.; 3656 #endif 3657 3658 /* Get index sets for faces, edges and vertices from graph */ 3659 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3660 /* print some info */ 3661 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3662 PetscInt nv; 3663 3664 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3665 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3666 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3667 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3668 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3672 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3673 } 3674 3675 /* free unneeded index sets */ 3676 if (!pcbddc->use_vertices) { 3677 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3678 } 3679 if (!pcbddc->use_edges) { 3680 for (i=0;i<n_ISForEdges;i++) { 3681 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3682 } 3683 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3684 n_ISForEdges = 0; 3685 } 3686 if (!pcbddc->use_faces) { 3687 for (i=0;i<n_ISForFaces;i++) { 3688 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3689 } 3690 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3691 n_ISForFaces = 0; 3692 } 3693 3694 /* check if near null space is attached to global mat */ 3695 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3696 if (nearnullsp) { 3697 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3698 /* remove any stored info */ 3699 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3700 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3701 /* store information for BDDC solver reuse */ 3702 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3703 pcbddc->onearnullspace = nearnullsp; 3704 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3705 for (i=0;i<nnsp_size;i++) { 3706 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3707 } 3708 } else { /* if near null space is not provided BDDC uses constants by default */ 3709 nnsp_size = 0; 3710 nnsp_has_cnst = PETSC_TRUE; 3711 } 3712 /* get max number of constraints on a single cc */ 3713 max_constraints = nnsp_size; 3714 if (nnsp_has_cnst) max_constraints++; 3715 3716 /* 3717 Evaluate maximum storage size needed by the procedure 3718 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3719 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3720 There can be multiple constraints per connected component 3721 */ 3722 n_vertices = 0; 3723 if (ISForVertices) { 3724 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3725 } 3726 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3727 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3728 3729 total_counts = n_ISForFaces+n_ISForEdges; 3730 total_counts *= max_constraints; 3731 total_counts += n_vertices; 3732 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3733 3734 total_counts = 0; 3735 max_size_of_constraint = 0; 3736 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3737 IS used_is; 3738 if (i<n_ISForEdges) { 3739 used_is = ISForEdges[i]; 3740 } else { 3741 used_is = ISForFaces[i-n_ISForEdges]; 3742 } 3743 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3744 total_counts += j; 3745 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3746 } 3747 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3748 3749 /* get local part of global near null space vectors */ 3750 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3751 for (k=0;k<nnsp_size;k++) { 3752 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3753 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3754 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3755 } 3756 3757 /* whether or not to skip lapack calls */ 3758 skip_lapack = PETSC_TRUE; 3759 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3760 3761 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3762 if (!skip_lapack) { 3763 PetscScalar temp_work; 3764 3765 #if defined(PETSC_MISSING_LAPACK_GESVD) 3766 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3767 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3768 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3769 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3770 #if defined(PETSC_USE_COMPLEX) 3771 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3772 #endif 3773 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3774 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3775 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3776 lwork = -1; 3777 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3778 #if !defined(PETSC_USE_COMPLEX) 3779 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3780 #else 3781 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3782 #endif 3783 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3784 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3785 #else /* on missing GESVD */ 3786 /* SVD */ 3787 PetscInt max_n,min_n; 3788 max_n = max_size_of_constraint; 3789 min_n = max_constraints; 3790 if (max_size_of_constraint < max_constraints) { 3791 min_n = max_size_of_constraint; 3792 max_n = max_constraints; 3793 } 3794 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3795 #if defined(PETSC_USE_COMPLEX) 3796 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3797 #endif 3798 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3799 lwork = -1; 3800 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3801 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3802 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3803 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3804 #if !defined(PETSC_USE_COMPLEX) 3805 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3806 #else 3807 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3808 #endif 3809 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3810 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3811 #endif /* on missing GESVD */ 3812 /* Allocate optimal workspace */ 3813 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3814 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3815 } 3816 /* Now we can loop on constraining sets */ 3817 total_counts = 0; 3818 constraints_idxs_ptr[0] = 0; 3819 constraints_data_ptr[0] = 0; 3820 /* vertices */ 3821 if (n_vertices) { 3822 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3823 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3824 for (i=0;i<n_vertices;i++) { 3825 constraints_n[total_counts] = 1; 3826 constraints_data[total_counts] = 1.0; 3827 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3828 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3829 total_counts++; 3830 } 3831 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3832 n_vertices = total_counts; 3833 } 3834 3835 /* edges and faces */ 3836 total_counts_cc = total_counts; 3837 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3838 IS used_is; 3839 PetscBool idxs_copied = PETSC_FALSE; 3840 3841 if (ncc<n_ISForEdges) { 3842 used_is = ISForEdges[ncc]; 3843 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3844 } else { 3845 used_is = ISForFaces[ncc-n_ISForEdges]; 3846 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3847 } 3848 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3849 3850 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3851 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3852 /* change of basis should not be performed on local periodic nodes */ 3853 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3854 if (nnsp_has_cnst) { 3855 PetscScalar quad_value; 3856 3857 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3858 idxs_copied = PETSC_TRUE; 3859 3860 if (!pcbddc->use_nnsp_true) { 3861 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3862 } else { 3863 quad_value = 1.0; 3864 } 3865 for (j=0;j<size_of_constraint;j++) { 3866 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3867 } 3868 temp_constraints++; 3869 total_counts++; 3870 } 3871 for (k=0;k<nnsp_size;k++) { 3872 PetscReal real_value; 3873 PetscScalar *ptr_to_data; 3874 3875 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3876 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3877 for (j=0;j<size_of_constraint;j++) { 3878 ptr_to_data[j] = array[is_indices[j]]; 3879 } 3880 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3881 /* check if array is null on the connected component */ 3882 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3883 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3884 if (real_value > 0.0) { /* keep indices and values */ 3885 temp_constraints++; 3886 total_counts++; 3887 if (!idxs_copied) { 3888 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3889 idxs_copied = PETSC_TRUE; 3890 } 3891 } 3892 } 3893 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3894 valid_constraints = temp_constraints; 3895 if (!pcbddc->use_nnsp_true && temp_constraints) { 3896 if (temp_constraints == 1) { /* just normalize the constraint */ 3897 PetscScalar norm,*ptr_to_data; 3898 3899 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3901 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3902 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3903 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3904 } else { /* perform SVD */ 3905 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3906 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3907 3908 #if defined(PETSC_MISSING_LAPACK_GESVD) 3909 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3910 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3911 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3912 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3913 from that computed using LAPACKgesvd 3914 -> This is due to a different computation of eigenvectors in LAPACKheev 3915 -> The quality of the POD-computed basis will be the same */ 3916 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3917 /* Store upper triangular part of correlation matrix */ 3918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3919 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3920 for (j=0;j<temp_constraints;j++) { 3921 for (k=0;k<j+1;k++) { 3922 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3923 } 3924 } 3925 /* compute eigenvalues and eigenvectors of correlation matrix */ 3926 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3927 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3928 #if !defined(PETSC_USE_COMPLEX) 3929 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3930 #else 3931 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3932 #endif 3933 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3934 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3935 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3936 j = 0; 3937 while (j < temp_constraints && singular_vals[j] < tol) j++; 3938 total_counts = total_counts-j; 3939 valid_constraints = temp_constraints-j; 3940 /* scale and copy POD basis into used quadrature memory */ 3941 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3942 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3943 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3947 if (j<temp_constraints) { 3948 PetscInt ii; 3949 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3950 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3951 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3953 for (k=0;k<temp_constraints-j;k++) { 3954 for (ii=0;ii<size_of_constraint;ii++) { 3955 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3956 } 3957 } 3958 } 3959 #else /* on missing GESVD */ 3960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3961 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3963 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3964 #if !defined(PETSC_USE_COMPLEX) 3965 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3966 #else 3967 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3968 #endif 3969 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3970 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3971 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3972 k = temp_constraints; 3973 if (k > size_of_constraint) k = size_of_constraint; 3974 j = 0; 3975 while (j < k && singular_vals[k-j-1] < tol) j++; 3976 valid_constraints = k-j; 3977 total_counts = total_counts-temp_constraints+valid_constraints; 3978 #endif /* on missing GESVD */ 3979 } 3980 } 3981 /* update pointers information */ 3982 if (valid_constraints) { 3983 constraints_n[total_counts_cc] = valid_constraints; 3984 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3985 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3986 /* set change_of_basis flag */ 3987 if (boolforchange) { 3988 PetscBTSet(change_basis,total_counts_cc); 3989 } 3990 total_counts_cc++; 3991 } 3992 } 3993 /* free workspace */ 3994 if (!skip_lapack) { 3995 ierr = PetscFree(work);CHKERRQ(ierr); 3996 #if defined(PETSC_USE_COMPLEX) 3997 ierr = PetscFree(rwork);CHKERRQ(ierr); 3998 #endif 3999 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4000 #if defined(PETSC_MISSING_LAPACK_GESVD) 4001 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4002 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4003 #endif 4004 } 4005 for (k=0;k<nnsp_size;k++) { 4006 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4007 } 4008 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4009 /* free index sets of faces, edges and vertices */ 4010 for (i=0;i<n_ISForFaces;i++) { 4011 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4012 } 4013 if (n_ISForFaces) { 4014 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4015 } 4016 for (i=0;i<n_ISForEdges;i++) { 4017 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4018 } 4019 if (n_ISForEdges) { 4020 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4021 } 4022 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4023 } else { 4024 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4025 4026 total_counts = 0; 4027 n_vertices = 0; 4028 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4029 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4030 } 4031 max_constraints = 0; 4032 total_counts_cc = 0; 4033 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4034 total_counts += pcbddc->adaptive_constraints_n[i]; 4035 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4036 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4037 } 4038 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4039 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4040 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4041 constraints_data = pcbddc->adaptive_constraints_data; 4042 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4043 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4044 total_counts_cc = 0; 4045 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4046 if (pcbddc->adaptive_constraints_n[i]) { 4047 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4048 } 4049 } 4050 #if 0 4051 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4052 for (i=0;i<total_counts_cc;i++) { 4053 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4054 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4055 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4056 printf(" %d",constraints_idxs[j]); 4057 } 4058 printf("\n"); 4059 printf("number of cc: %d\n",constraints_n[i]); 4060 } 4061 for (i=0;i<n_vertices;i++) { 4062 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4063 } 4064 for (i=0;i<sub_schurs->n_subs;i++) { 4065 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4066 } 4067 #endif 4068 4069 max_size_of_constraint = 0; 4070 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4071 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4072 /* Change of basis */ 4073 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4074 if (pcbddc->use_change_of_basis) { 4075 for (i=0;i<sub_schurs->n_subs;i++) { 4076 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4077 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4078 } 4079 } 4080 } 4081 } 4082 pcbddc->local_primal_size = total_counts; 4083 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4084 4085 /* map constraints_idxs in boundary numbering */ 4086 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4087 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4088 4089 /* Create constraint matrix */ 4090 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4091 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4092 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4093 4094 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4095 /* determine if a QR strategy is needed for change of basis */ 4096 qr_needed = PETSC_FALSE; 4097 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4098 total_primal_vertices=0; 4099 pcbddc->local_primal_size_cc = 0; 4100 for (i=0;i<total_counts_cc;i++) { 4101 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4102 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4103 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4104 pcbddc->local_primal_size_cc += 1; 4105 } else if (PetscBTLookup(change_basis,i)) { 4106 for (k=0;k<constraints_n[i];k++) { 4107 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4108 } 4109 pcbddc->local_primal_size_cc += constraints_n[i]; 4110 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4111 PetscBTSet(qr_needed_idx,i); 4112 qr_needed = PETSC_TRUE; 4113 } 4114 } else { 4115 pcbddc->local_primal_size_cc += 1; 4116 } 4117 } 4118 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4119 pcbddc->n_vertices = total_primal_vertices; 4120 /* permute indices in order to have a sorted set of vertices */ 4121 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4122 4123 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4124 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4125 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4126 4127 /* nonzero structure of constraint matrix */ 4128 /* and get reference dof for local constraints */ 4129 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4130 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4131 4132 j = total_primal_vertices; 4133 total_counts = total_primal_vertices; 4134 cum = total_primal_vertices; 4135 for (i=n_vertices;i<total_counts_cc;i++) { 4136 if (!PetscBTLookup(change_basis,i)) { 4137 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4138 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4139 cum++; 4140 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4141 for (k=0;k<constraints_n[i];k++) { 4142 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4143 nnz[j+k] = size_of_constraint; 4144 } 4145 j += constraints_n[i]; 4146 } 4147 } 4148 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4149 ierr = PetscFree(nnz);CHKERRQ(ierr); 4150 4151 /* set values in constraint matrix */ 4152 for (i=0;i<total_primal_vertices;i++) { 4153 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4154 } 4155 total_counts = total_primal_vertices; 4156 for (i=n_vertices;i<total_counts_cc;i++) { 4157 if (!PetscBTLookup(change_basis,i)) { 4158 PetscInt *cols; 4159 4160 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4161 cols = constraints_idxs+constraints_idxs_ptr[i]; 4162 for (k=0;k<constraints_n[i];k++) { 4163 PetscInt row = total_counts+k; 4164 PetscScalar *vals; 4165 4166 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4167 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4168 } 4169 total_counts += constraints_n[i]; 4170 } 4171 } 4172 /* assembling */ 4173 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4174 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4175 4176 /* 4177 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4178 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4179 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4180 */ 4181 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4182 if (pcbddc->use_change_of_basis) { 4183 /* dual and primal dofs on a single cc */ 4184 PetscInt dual_dofs,primal_dofs; 4185 /* working stuff for GEQRF */ 4186 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4187 PetscBLASInt lqr_work; 4188 /* working stuff for UNGQR */ 4189 PetscScalar *gqr_work,lgqr_work_t; 4190 PetscBLASInt lgqr_work; 4191 /* working stuff for TRTRS */ 4192 PetscScalar *trs_rhs; 4193 PetscBLASInt Blas_NRHS; 4194 /* pointers for values insertion into change of basis matrix */ 4195 PetscInt *start_rows,*start_cols; 4196 PetscScalar *start_vals; 4197 /* working stuff for values insertion */ 4198 PetscBT is_primal; 4199 PetscInt *aux_primal_numbering_B; 4200 /* matrix sizes */ 4201 PetscInt global_size,local_size; 4202 /* temporary change of basis */ 4203 Mat localChangeOfBasisMatrix; 4204 /* extra space for debugging */ 4205 PetscScalar *dbg_work; 4206 4207 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4208 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4209 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4210 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4211 /* nonzeros for local mat */ 4212 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4213 if (!pcbddc->benign_change || pcbddc->fake_change) { 4214 for (i=0;i<pcis->n;i++) nnz[i]=1; 4215 } else { 4216 const PetscInt *ii; 4217 PetscInt n; 4218 PetscBool flg_row; 4219 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4220 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4221 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4222 } 4223 for (i=n_vertices;i<total_counts_cc;i++) { 4224 if (PetscBTLookup(change_basis,i)) { 4225 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4226 if (PetscBTLookup(qr_needed_idx,i)) { 4227 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4228 } else { 4229 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4230 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4231 } 4232 } 4233 } 4234 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4235 ierr = PetscFree(nnz);CHKERRQ(ierr); 4236 /* Set interior change in the matrix */ 4237 if (!pcbddc->benign_change || pcbddc->fake_change) { 4238 for (i=0;i<pcis->n;i++) { 4239 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4240 } 4241 } else { 4242 const PetscInt *ii,*jj; 4243 PetscScalar *aa; 4244 PetscInt n; 4245 PetscBool flg_row; 4246 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4247 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4248 for (i=0;i<n;i++) { 4249 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4250 } 4251 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4252 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4253 } 4254 4255 if (pcbddc->dbg_flag) { 4256 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4258 } 4259 4260 4261 /* Now we loop on the constraints which need a change of basis */ 4262 /* 4263 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4264 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4265 4266 Basic blocks of change of basis matrix T computed by 4267 4268 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4269 4270 | 1 0 ... 0 s_1/S | 4271 | 0 1 ... 0 s_2/S | 4272 | ... | 4273 | 0 ... 1 s_{n-1}/S | 4274 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4275 4276 with S = \sum_{i=1}^n s_i^2 4277 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4278 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4279 4280 - QR decomposition of constraints otherwise 4281 */ 4282 if (qr_needed) { 4283 /* space to store Q */ 4284 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4285 /* first we issue queries for optimal work */ 4286 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4287 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4288 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4289 lqr_work = -1; 4290 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4291 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4292 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4293 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4294 lgqr_work = -1; 4295 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4296 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4297 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4298 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4299 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4300 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4301 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4302 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4303 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4304 /* array to store scaling factors for reflectors */ 4305 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4306 /* array to store rhs and solution of triangular solver */ 4307 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4308 /* allocating workspace for check */ 4309 if (pcbddc->dbg_flag) { 4310 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4311 } 4312 } 4313 /* array to store whether a node is primal or not */ 4314 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4315 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4316 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4317 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4318 for (i=0;i<total_primal_vertices;i++) { 4319 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4320 } 4321 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4322 4323 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4324 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4325 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4326 if (PetscBTLookup(change_basis,total_counts)) { 4327 /* get constraint info */ 4328 primal_dofs = constraints_n[total_counts]; 4329 dual_dofs = size_of_constraint-primal_dofs; 4330 4331 if (pcbddc->dbg_flag) { 4332 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4333 } 4334 4335 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4336 4337 /* copy quadrature constraints for change of basis check */ 4338 if (pcbddc->dbg_flag) { 4339 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4340 } 4341 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4342 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4343 4344 /* compute QR decomposition of constraints */ 4345 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4346 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4347 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4348 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4349 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4350 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4351 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4352 4353 /* explictly compute R^-T */ 4354 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4355 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4356 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4357 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4358 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4359 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4360 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4361 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4362 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4363 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4364 4365 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4366 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4367 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4368 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4369 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4370 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4371 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4372 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4373 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4374 4375 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4376 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4377 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4379 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4380 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4381 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4382 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4383 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4384 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4385 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4386 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4387 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4388 4389 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4390 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4391 /* insert cols for primal dofs */ 4392 for (j=0;j<primal_dofs;j++) { 4393 start_vals = &qr_basis[j*size_of_constraint]; 4394 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4395 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4396 } 4397 /* insert cols for dual dofs */ 4398 for (j=0,k=0;j<dual_dofs;k++) { 4399 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4400 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4401 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4402 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4403 j++; 4404 } 4405 } 4406 4407 /* check change of basis */ 4408 if (pcbddc->dbg_flag) { 4409 PetscInt ii,jj; 4410 PetscBool valid_qr=PETSC_TRUE; 4411 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4412 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4413 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4414 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4415 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4417 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4418 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4420 for (jj=0;jj<size_of_constraint;jj++) { 4421 for (ii=0;ii<primal_dofs;ii++) { 4422 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4423 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4424 } 4425 } 4426 if (!valid_qr) { 4427 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4428 for (jj=0;jj<size_of_constraint;jj++) { 4429 for (ii=0;ii<primal_dofs;ii++) { 4430 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4431 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4432 } 4433 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4434 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4435 } 4436 } 4437 } 4438 } else { 4439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4440 } 4441 } 4442 } else { /* simple transformation block */ 4443 PetscInt row,col; 4444 PetscScalar val,norm; 4445 4446 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4447 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4448 for (j=0;j<size_of_constraint;j++) { 4449 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4450 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4451 if (!PetscBTLookup(is_primal,row_B)) { 4452 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4453 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4454 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4455 } else { 4456 for (k=0;k<size_of_constraint;k++) { 4457 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4458 if (row != col) { 4459 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4460 } else { 4461 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4462 } 4463 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4464 } 4465 } 4466 } 4467 if (pcbddc->dbg_flag) { 4468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4469 } 4470 } 4471 } else { 4472 if (pcbddc->dbg_flag) { 4473 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4474 } 4475 } 4476 } 4477 4478 /* free workspace */ 4479 if (qr_needed) { 4480 if (pcbddc->dbg_flag) { 4481 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4482 } 4483 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4484 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4485 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4486 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4487 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4488 } 4489 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4490 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4491 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4492 4493 /* assembling of global change of variable */ 4494 if (!pcbddc->fake_change) { 4495 Mat tmat; 4496 PetscInt bs; 4497 4498 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4499 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4500 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4501 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4502 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4503 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4504 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4505 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4506 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4507 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4508 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4509 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4510 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4511 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4512 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4513 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4514 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4515 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4516 4517 /* check */ 4518 if (pcbddc->dbg_flag) { 4519 PetscReal error; 4520 Vec x,x_change; 4521 4522 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4523 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4524 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4525 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4526 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4527 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4528 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4529 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4530 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4531 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4532 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4533 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4534 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4536 ierr = VecDestroy(&x);CHKERRQ(ierr); 4537 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4538 } 4539 /* adapt sub_schurs computed (if any) */ 4540 if (pcbddc->use_deluxe_scaling) { 4541 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4542 4543 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4544 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4545 } 4546 if (sub_schurs && sub_schurs->S_Ej_all) { 4547 Mat S_new,tmat; 4548 IS is_all_N,is_V_Sall = NULL; 4549 4550 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4551 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4552 if (pcbddc->deluxe_zerorows) { 4553 ISLocalToGlobalMapping NtoSall; 4554 IS is_V; 4555 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4556 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4557 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4558 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4559 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4560 } 4561 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4562 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4563 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4564 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4565 if (pcbddc->deluxe_zerorows) { 4566 const PetscScalar *array; 4567 const PetscInt *idxs_V,*idxs_all; 4568 PetscInt i,n_V; 4569 4570 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4571 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4572 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4573 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4574 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4575 for (i=0;i<n_V;i++) { 4576 PetscScalar val; 4577 PetscInt idx; 4578 4579 idx = idxs_V[i]; 4580 val = array[idxs_all[idxs_V[i]]]; 4581 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4582 } 4583 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4584 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4585 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4586 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4587 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4588 } 4589 sub_schurs->S_Ej_all = S_new; 4590 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4591 if (sub_schurs->sum_S_Ej_all) { 4592 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4593 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4594 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4595 if (pcbddc->deluxe_zerorows) { 4596 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4597 } 4598 sub_schurs->sum_S_Ej_all = S_new; 4599 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4600 } 4601 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4602 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4603 } 4604 /* destroy any change of basis context in sub_schurs */ 4605 if (sub_schurs && sub_schurs->change) { 4606 PetscInt i; 4607 4608 for (i=0;i<sub_schurs->n_subs;i++) { 4609 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4610 } 4611 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4612 } 4613 } 4614 if (pcbddc->switch_static) { /* need to save the local change */ 4615 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4616 } else { 4617 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4618 } 4619 /* determine if any process has changed the pressures locally */ 4620 pcbddc->change_interior = pcbddc->benign_have_null; 4621 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4622 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4623 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4624 pcbddc->use_qr_single = qr_needed; 4625 } 4626 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4627 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4628 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4629 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4630 } else { 4631 Mat benign_global = NULL; 4632 if (pcbddc->benign_have_null) { 4633 Mat tmat; 4634 4635 pcbddc->change_interior = PETSC_TRUE; 4636 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4637 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4638 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4639 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4640 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4641 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4642 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4643 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4644 if (pcbddc->benign_change) { 4645 Mat M; 4646 4647 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4648 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4649 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4650 ierr = MatDestroy(&M);CHKERRQ(ierr); 4651 } else { 4652 Mat eye; 4653 PetscScalar *array; 4654 4655 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4656 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4657 for (i=0;i<pcis->n;i++) { 4658 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4659 } 4660 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4661 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4662 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4663 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4664 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4665 } 4666 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4667 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4668 } 4669 if (pcbddc->user_ChangeOfBasisMatrix) { 4670 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4671 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4672 } else if (pcbddc->benign_have_null) { 4673 pcbddc->ChangeOfBasisMatrix = benign_global; 4674 } 4675 } 4676 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4677 IS is_global; 4678 const PetscInt *gidxs; 4679 4680 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4681 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4682 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4683 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4684 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4685 } 4686 } 4687 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4688 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4689 } 4690 4691 if (!pcbddc->fake_change) { 4692 /* add pressure dofs to set of primal nodes for numbering purposes */ 4693 for (i=0;i<pcbddc->benign_n;i++) { 4694 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4695 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4696 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4697 pcbddc->local_primal_size_cc++; 4698 pcbddc->local_primal_size++; 4699 } 4700 4701 /* check if a new primal space has been introduced (also take into account benign trick) */ 4702 pcbddc->new_primal_space_local = PETSC_TRUE; 4703 if (olocal_primal_size == pcbddc->local_primal_size) { 4704 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4705 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4706 if (!pcbddc->new_primal_space_local) { 4707 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4708 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4709 } 4710 } 4711 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4712 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4713 } 4714 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4715 4716 /* flush dbg viewer */ 4717 if (pcbddc->dbg_flag) { 4718 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4719 } 4720 4721 /* free workspace */ 4722 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4723 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4724 if (!pcbddc->adaptive_selection) { 4725 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4726 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4727 } else { 4728 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4729 pcbddc->adaptive_constraints_idxs_ptr, 4730 pcbddc->adaptive_constraints_data_ptr, 4731 pcbddc->adaptive_constraints_idxs, 4732 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4733 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4734 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4735 } 4736 PetscFunctionReturn(0); 4737 } 4738 4739 #undef __FUNCT__ 4740 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4741 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4742 { 4743 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4744 PC_IS *pcis = (PC_IS*)pc->data; 4745 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4746 PetscInt ierr,i,N; 4747 4748 PetscFunctionBegin; 4749 /* Reset previously computed graph */ 4750 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4751 /* Init local Graph struct */ 4752 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4753 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4754 4755 /* Check validity of the csr graph passed in by the user */ 4756 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4757 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4758 } 4759 4760 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4761 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4762 PetscInt *xadj,*adjncy; 4763 PetscInt nvtxs; 4764 PetscBool flg_row=PETSC_FALSE; 4765 4766 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4767 if (flg_row) { 4768 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4769 pcbddc->computed_rowadj = PETSC_TRUE; 4770 } 4771 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4772 } 4773 if (pcbddc->dbg_flag) { 4774 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4775 } 4776 4777 /* Setup of Graph */ 4778 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4779 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4780 4781 /* attach info on disconnected subdomains if present */ 4782 if (pcbddc->n_local_subs) { 4783 PetscInt *local_subs; 4784 4785 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4786 for (i=0;i<pcbddc->n_local_subs;i++) { 4787 const PetscInt *idxs; 4788 PetscInt nl,j; 4789 4790 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4791 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4792 for (j=0;j<nl;j++) { 4793 local_subs[idxs[j]] = i; 4794 } 4795 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4796 } 4797 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4798 pcbddc->mat_graph->local_subs = local_subs; 4799 } 4800 4801 /* Graph's connected components analysis */ 4802 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4803 PetscFunctionReturn(0); 4804 } 4805 4806 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4807 #undef __FUNCT__ 4808 #define __FUNCT__ "PCBDDCSubsetNumbering" 4809 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4810 { 4811 PetscSF sf; 4812 PetscLayout map; 4813 const PetscInt *idxs; 4814 PetscInt *leaf_data,*root_data,*gidxs; 4815 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4816 PetscInt n_n,nlocals,start,first_index; 4817 PetscMPIInt commsize; 4818 PetscBool first_found; 4819 PetscErrorCode ierr; 4820 4821 PetscFunctionBegin; 4822 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4823 if (subset_mult) { 4824 PetscCheckSameComm(subset,1,subset_mult,2); 4825 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4826 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4827 } 4828 /* create workspace layout for computing global indices of subset */ 4829 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4830 lbounds[0] = lbounds[1] = 0; 4831 for (i=0;i<n;i++) { 4832 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4833 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4834 } 4835 lbounds[0] = -lbounds[0]; 4836 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4837 gbounds[0] = -gbounds[0]; 4838 N = gbounds[1] - gbounds[0] + 1; 4839 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4840 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4841 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4842 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4843 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4844 4845 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4846 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4847 if (subset_mult) { 4848 const PetscInt* idxs_mult; 4849 4850 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4851 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4852 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4853 } else { 4854 for (i=0;i<n;i++) leaf_data[i] = 1; 4855 } 4856 /* local size of new subset */ 4857 n_n = 0; 4858 for (i=0;i<n;i++) n_n += leaf_data[i]; 4859 4860 /* global indexes in layout */ 4861 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4862 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4863 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4864 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4865 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4866 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4867 4868 /* reduce from leaves to roots */ 4869 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4870 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4871 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4872 4873 /* count indexes in local part of layout */ 4874 nlocals = 0; 4875 first_index = -1; 4876 first_found = PETSC_FALSE; 4877 for (i=0;i<Nl;i++) { 4878 if (!first_found && root_data[i]) { 4879 first_found = PETSC_TRUE; 4880 first_index = i; 4881 } 4882 nlocals += root_data[i]; 4883 } 4884 4885 /* cumulative of number of indexes and size of subset without holes */ 4886 #if defined(PETSC_HAVE_MPI_EXSCAN) 4887 start = 0; 4888 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4889 #else 4890 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4891 start = start-nlocals; 4892 #endif 4893 4894 if (N_n) { /* compute total size of new subset if requested */ 4895 *N_n = start + nlocals; 4896 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4897 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4898 } 4899 4900 /* adapt root data with cumulative */ 4901 if (first_found) { 4902 PetscInt old_index; 4903 4904 root_data[first_index] += start; 4905 old_index = first_index; 4906 for (i=first_index+1;i<Nl;i++) { 4907 if (root_data[i]) { 4908 root_data[i] += root_data[old_index]; 4909 old_index = i; 4910 } 4911 } 4912 } 4913 4914 /* from roots to leaves */ 4915 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4916 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4917 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4918 4919 /* create new IS with global indexes without holes */ 4920 if (subset_mult) { 4921 const PetscInt* idxs_mult; 4922 PetscInt cum; 4923 4924 cum = 0; 4925 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4926 for (i=0;i<n;i++) { 4927 PetscInt j; 4928 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4929 } 4930 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4931 } else { 4932 for (i=0;i<n;i++) { 4933 gidxs[i] = leaf_data[i]-1; 4934 } 4935 } 4936 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4937 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4938 PetscFunctionReturn(0); 4939 } 4940 4941 /* this implements stabilized Gram-Schmidt */ 4942 #undef __FUNCT__ 4943 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4944 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4945 { 4946 PetscInt i,j; 4947 PetscScalar *alphas; 4948 PetscErrorCode ierr; 4949 4950 PetscFunctionBegin; 4951 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4952 for (i=0;i<n;i++) { 4953 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4954 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4955 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4956 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4957 } 4958 ierr = PetscFree(alphas);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 #undef __FUNCT__ 4963 #define __FUNCT__ "MatISGetSubassemblingPattern" 4964 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4965 { 4966 Mat A; 4967 PetscInt n_neighs,*neighs,*n_shared,**shared; 4968 PetscMPIInt size,rank,color; 4969 PetscInt *xadj,*adjncy; 4970 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4971 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4972 PetscInt void_procs,*procs_candidates = NULL; 4973 PetscInt xadj_count, *count; 4974 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4975 PetscSubcomm psubcomm; 4976 MPI_Comm subcomm; 4977 PetscErrorCode ierr; 4978 4979 PetscFunctionBegin; 4980 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4981 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4982 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4983 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4984 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4985 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4986 4987 if (have_void) *have_void = PETSC_FALSE; 4988 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4989 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4990 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4991 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4992 im_active = !!(n); 4993 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4994 void_procs = size - active_procs; 4995 /* get ranks of of non-active processes in mat communicator */ 4996 if (void_procs) { 4997 PetscInt ncand; 4998 4999 if (have_void) *have_void = PETSC_TRUE; 5000 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5001 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5002 for (i=0,ncand=0;i<size;i++) { 5003 if (!procs_candidates[i]) { 5004 procs_candidates[ncand++] = i; 5005 } 5006 } 5007 /* force n_subdomains to be not greater that the number of non-active processes */ 5008 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5009 } 5010 5011 /* number of subdomains requested greater than active processes -> just shift the matrix */ 5012 if (active_procs < *n_subdomains) { 5013 PetscInt issize,isidx; 5014 if (im_active) { 5015 issize = 1; 5016 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5017 isidx = procs_candidates[rank]; 5018 } else { 5019 isidx = rank; 5020 } 5021 } else { 5022 issize = 0; 5023 isidx = -1; 5024 } 5025 *n_subdomains = active_procs; 5026 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5027 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5028 PetscFunctionReturn(0); 5029 } 5030 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5031 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5032 threshold = PetscMax(threshold,2); 5033 5034 /* Get info on mapping */ 5035 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5036 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5037 5038 /* build local CSR graph of subdomains' connectivity */ 5039 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5040 xadj[0] = 0; 5041 xadj[1] = PetscMax(n_neighs-1,0); 5042 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5043 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5044 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5045 for (i=1;i<n_neighs;i++) 5046 for (j=0;j<n_shared[i];j++) 5047 count[shared[i][j]] += 1; 5048 5049 xadj_count = 0; 5050 for (i=1;i<n_neighs;i++) { 5051 for (j=0;j<n_shared[i];j++) { 5052 if (count[shared[i][j]] < threshold) { 5053 adjncy[xadj_count] = neighs[i]; 5054 adjncy_wgt[xadj_count] = n_shared[i]; 5055 xadj_count++; 5056 break; 5057 } 5058 } 5059 } 5060 xadj[1] = xadj_count; 5061 ierr = PetscFree(count);CHKERRQ(ierr); 5062 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5063 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5064 5065 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5066 5067 /* Restrict work on active processes only */ 5068 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5069 if (void_procs) { 5070 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5071 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5072 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5073 subcomm = PetscSubcommChild(psubcomm); 5074 } else { 5075 psubcomm = NULL; 5076 subcomm = PetscObjectComm((PetscObject)mat); 5077 } 5078 5079 v_wgt = NULL; 5080 if (!color) { 5081 ierr = PetscFree(xadj);CHKERRQ(ierr); 5082 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5083 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5084 } else { 5085 Mat subdomain_adj; 5086 IS new_ranks,new_ranks_contig; 5087 MatPartitioning partitioner; 5088 PetscInt rstart=0,rend=0; 5089 PetscInt *is_indices,*oldranks; 5090 PetscMPIInt size; 5091 PetscBool aggregate; 5092 5093 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5094 if (void_procs) { 5095 PetscInt prank = rank; 5096 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5097 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5098 for (i=0;i<xadj[1];i++) { 5099 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5100 } 5101 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5102 } else { 5103 oldranks = NULL; 5104 } 5105 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5106 if (aggregate) { /* TODO: all this part could be made more efficient */ 5107 PetscInt lrows,row,ncols,*cols; 5108 PetscMPIInt nrank; 5109 PetscScalar *vals; 5110 5111 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5112 lrows = 0; 5113 if (nrank<redprocs) { 5114 lrows = size/redprocs; 5115 if (nrank<size%redprocs) lrows++; 5116 } 5117 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5118 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5119 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5120 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5121 row = nrank; 5122 ncols = xadj[1]-xadj[0]; 5123 cols = adjncy; 5124 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5125 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5126 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5127 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5128 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5129 ierr = PetscFree(xadj);CHKERRQ(ierr); 5130 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5131 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5132 ierr = PetscFree(vals);CHKERRQ(ierr); 5133 if (use_vwgt) { 5134 Vec v; 5135 const PetscScalar *array; 5136 PetscInt nl; 5137 5138 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5139 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5140 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5141 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5142 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5143 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5144 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5145 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5146 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5147 ierr = VecDestroy(&v);CHKERRQ(ierr); 5148 } 5149 } else { 5150 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5151 if (use_vwgt) { 5152 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5153 v_wgt[0] = local_size; 5154 } 5155 } 5156 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5157 5158 /* Partition */ 5159 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5160 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5161 if (v_wgt) { 5162 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5163 } 5164 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5165 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5166 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5167 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5168 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5169 5170 /* renumber new_ranks to avoid "holes" in new set of processors */ 5171 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5172 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5173 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5174 if (!aggregate) { 5175 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5176 #if defined(PETSC_USE_DEBUG) 5177 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5178 #endif 5179 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5180 } else if (oldranks) { 5181 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5182 } else { 5183 ranks_send_to_idx[0] = is_indices[0]; 5184 } 5185 } else { 5186 PetscInt idxs[1]; 5187 PetscMPIInt tag; 5188 MPI_Request *reqs; 5189 5190 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5191 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5192 for (i=rstart;i<rend;i++) { 5193 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5194 } 5195 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5196 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5197 ierr = PetscFree(reqs);CHKERRQ(ierr); 5198 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5199 #if defined(PETSC_USE_DEBUG) 5200 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5201 #endif 5202 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5203 } else if (oldranks) { 5204 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5205 } else { 5206 ranks_send_to_idx[0] = idxs[0]; 5207 } 5208 } 5209 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5210 /* clean up */ 5211 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5212 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5213 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5214 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5215 } 5216 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5217 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5218 5219 /* assemble parallel IS for sends */ 5220 i = 1; 5221 if (!color) i=0; 5222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5223 PetscFunctionReturn(0); 5224 } 5225 5226 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5227 5228 #undef __FUNCT__ 5229 #define __FUNCT__ "MatISSubassemble" 5230 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5231 { 5232 Mat local_mat; 5233 IS is_sends_internal; 5234 PetscInt rows,cols,new_local_rows; 5235 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5236 PetscBool ismatis,isdense,newisdense,destroy_mat; 5237 ISLocalToGlobalMapping l2gmap; 5238 PetscInt* l2gmap_indices; 5239 const PetscInt* is_indices; 5240 MatType new_local_type; 5241 /* buffers */ 5242 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5243 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5244 PetscInt *recv_buffer_idxs_local; 5245 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5246 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5247 /* MPI */ 5248 MPI_Comm comm,comm_n; 5249 PetscSubcomm subcomm; 5250 PetscMPIInt n_sends,n_recvs,commsize; 5251 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5252 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5253 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5254 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5255 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5256 PetscErrorCode ierr; 5257 5258 PetscFunctionBegin; 5259 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5260 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5261 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5262 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5263 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5264 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5265 PetscValidLogicalCollectiveBool(mat,reuse,6); 5266 PetscValidLogicalCollectiveInt(mat,nis,8); 5267 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5268 if (nvecs) { 5269 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5270 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5271 } 5272 /* further checks */ 5273 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5274 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5275 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5276 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5277 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5278 if (reuse && *mat_n) { 5279 PetscInt mrows,mcols,mnrows,mncols; 5280 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5281 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5282 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5283 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5284 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5285 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5286 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5287 } 5288 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5289 PetscValidLogicalCollectiveInt(mat,bs,0); 5290 5291 /* prepare IS for sending if not provided */ 5292 if (!is_sends) { 5293 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5294 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5295 } else { 5296 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5297 is_sends_internal = is_sends; 5298 } 5299 5300 /* get comm */ 5301 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5302 5303 /* compute number of sends */ 5304 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5305 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5306 5307 /* compute number of receives */ 5308 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5309 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5310 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5311 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5312 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5313 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5314 ierr = PetscFree(iflags);CHKERRQ(ierr); 5315 5316 /* restrict comm if requested */ 5317 subcomm = 0; 5318 destroy_mat = PETSC_FALSE; 5319 if (restrict_comm) { 5320 PetscMPIInt color,subcommsize; 5321 5322 color = 0; 5323 if (restrict_full) { 5324 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5325 } else { 5326 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5327 } 5328 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5329 subcommsize = commsize - subcommsize; 5330 /* check if reuse has been requested */ 5331 if (reuse) { 5332 if (*mat_n) { 5333 PetscMPIInt subcommsize2; 5334 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5335 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5336 comm_n = PetscObjectComm((PetscObject)*mat_n); 5337 } else { 5338 comm_n = PETSC_COMM_SELF; 5339 } 5340 } else { /* MAT_INITIAL_MATRIX */ 5341 PetscMPIInt rank; 5342 5343 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5344 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5345 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5346 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5347 comm_n = PetscSubcommChild(subcomm); 5348 } 5349 /* flag to destroy *mat_n if not significative */ 5350 if (color) destroy_mat = PETSC_TRUE; 5351 } else { 5352 comm_n = comm; 5353 } 5354 5355 /* prepare send/receive buffers */ 5356 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5357 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5358 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5359 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5360 if (nis) { 5361 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5362 } 5363 5364 /* Get data from local matrices */ 5365 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5366 /* TODO: See below some guidelines on how to prepare the local buffers */ 5367 /* 5368 send_buffer_vals should contain the raw values of the local matrix 5369 send_buffer_idxs should contain: 5370 - MatType_PRIVATE type 5371 - PetscInt size_of_l2gmap 5372 - PetscInt global_row_indices[size_of_l2gmap] 5373 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5374 */ 5375 else { 5376 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5377 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5378 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5379 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5380 send_buffer_idxs[1] = i; 5381 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5382 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5383 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5384 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5385 for (i=0;i<n_sends;i++) { 5386 ilengths_vals[is_indices[i]] = len*len; 5387 ilengths_idxs[is_indices[i]] = len+2; 5388 } 5389 } 5390 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5391 /* additional is (if any) */ 5392 if (nis) { 5393 PetscMPIInt psum; 5394 PetscInt j; 5395 for (j=0,psum=0;j<nis;j++) { 5396 PetscInt plen; 5397 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5398 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5399 psum += len+1; /* indices + lenght */ 5400 } 5401 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5402 for (j=0,psum=0;j<nis;j++) { 5403 PetscInt plen; 5404 const PetscInt *is_array_idxs; 5405 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5406 send_buffer_idxs_is[psum] = plen; 5407 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5408 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5409 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5410 psum += plen+1; /* indices + lenght */ 5411 } 5412 for (i=0;i<n_sends;i++) { 5413 ilengths_idxs_is[is_indices[i]] = psum; 5414 } 5415 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5416 } 5417 5418 buf_size_idxs = 0; 5419 buf_size_vals = 0; 5420 buf_size_idxs_is = 0; 5421 buf_size_vecs = 0; 5422 for (i=0;i<n_recvs;i++) { 5423 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5424 buf_size_vals += (PetscInt)olengths_vals[i]; 5425 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5426 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5427 } 5428 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5429 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5430 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5431 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5432 5433 /* get new tags for clean communications */ 5434 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5435 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5436 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5437 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5438 5439 /* allocate for requests */ 5440 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5441 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5442 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5443 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5444 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5445 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5446 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5447 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5448 5449 /* communications */ 5450 ptr_idxs = recv_buffer_idxs; 5451 ptr_vals = recv_buffer_vals; 5452 ptr_idxs_is = recv_buffer_idxs_is; 5453 ptr_vecs = recv_buffer_vecs; 5454 for (i=0;i<n_recvs;i++) { 5455 source_dest = onodes[i]; 5456 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5457 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5458 ptr_idxs += olengths_idxs[i]; 5459 ptr_vals += olengths_vals[i]; 5460 if (nis) { 5461 source_dest = onodes_is[i]; 5462 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5463 ptr_idxs_is += olengths_idxs_is[i]; 5464 } 5465 if (nvecs) { 5466 source_dest = onodes[i]; 5467 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5468 ptr_vecs += olengths_idxs[i]-2; 5469 } 5470 } 5471 for (i=0;i<n_sends;i++) { 5472 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5473 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5474 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5475 if (nis) { 5476 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5477 } 5478 if (nvecs) { 5479 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5480 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5481 } 5482 } 5483 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5484 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5485 5486 /* assemble new l2g map */ 5487 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5488 ptr_idxs = recv_buffer_idxs; 5489 new_local_rows = 0; 5490 for (i=0;i<n_recvs;i++) { 5491 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5492 ptr_idxs += olengths_idxs[i]; 5493 } 5494 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5495 ptr_idxs = recv_buffer_idxs; 5496 new_local_rows = 0; 5497 for (i=0;i<n_recvs;i++) { 5498 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5499 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5500 ptr_idxs += olengths_idxs[i]; 5501 } 5502 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5503 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5504 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5505 5506 /* infer new local matrix type from received local matrices type */ 5507 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5508 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5509 if (n_recvs) { 5510 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5511 ptr_idxs = recv_buffer_idxs; 5512 for (i=0;i<n_recvs;i++) { 5513 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5514 new_local_type_private = MATAIJ_PRIVATE; 5515 break; 5516 } 5517 ptr_idxs += olengths_idxs[i]; 5518 } 5519 switch (new_local_type_private) { 5520 case MATDENSE_PRIVATE: 5521 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5522 new_local_type = MATSEQAIJ; 5523 bs = 1; 5524 } else { /* if I receive only 1 dense matrix */ 5525 new_local_type = MATSEQDENSE; 5526 bs = 1; 5527 } 5528 break; 5529 case MATAIJ_PRIVATE: 5530 new_local_type = MATSEQAIJ; 5531 bs = 1; 5532 break; 5533 case MATBAIJ_PRIVATE: 5534 new_local_type = MATSEQBAIJ; 5535 break; 5536 case MATSBAIJ_PRIVATE: 5537 new_local_type = MATSEQSBAIJ; 5538 break; 5539 default: 5540 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5541 break; 5542 } 5543 } else { /* by default, new_local_type is seqdense */ 5544 new_local_type = MATSEQDENSE; 5545 bs = 1; 5546 } 5547 5548 /* create MATIS object if needed */ 5549 if (!reuse) { 5550 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5551 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5552 } else { 5553 /* it also destroys the local matrices */ 5554 if (*mat_n) { 5555 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5556 } else { /* this is a fake object */ 5557 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5558 } 5559 } 5560 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5561 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5562 5563 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5564 5565 /* Global to local map of received indices */ 5566 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5567 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5568 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5569 5570 /* restore attributes -> type of incoming data and its size */ 5571 buf_size_idxs = 0; 5572 for (i=0;i<n_recvs;i++) { 5573 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5574 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5575 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5576 } 5577 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5578 5579 /* set preallocation */ 5580 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5581 if (!newisdense) { 5582 PetscInt *new_local_nnz=0; 5583 5584 ptr_idxs = recv_buffer_idxs_local; 5585 if (n_recvs) { 5586 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5587 } 5588 for (i=0;i<n_recvs;i++) { 5589 PetscInt j; 5590 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5591 for (j=0;j<*(ptr_idxs+1);j++) { 5592 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5593 } 5594 } else { 5595 /* TODO */ 5596 } 5597 ptr_idxs += olengths_idxs[i]; 5598 } 5599 if (new_local_nnz) { 5600 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5601 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5602 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5603 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5604 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5605 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5606 } else { 5607 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5608 } 5609 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5610 } else { 5611 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5612 } 5613 5614 /* set values */ 5615 ptr_vals = recv_buffer_vals; 5616 ptr_idxs = recv_buffer_idxs_local; 5617 for (i=0;i<n_recvs;i++) { 5618 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5619 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5620 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5621 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5622 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5623 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5624 } else { 5625 /* TODO */ 5626 } 5627 ptr_idxs += olengths_idxs[i]; 5628 ptr_vals += olengths_vals[i]; 5629 } 5630 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5631 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5634 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5635 5636 #if 0 5637 if (!restrict_comm) { /* check */ 5638 Vec lvec,rvec; 5639 PetscReal infty_error; 5640 5641 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5642 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5643 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5644 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5645 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5646 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5647 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5648 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5649 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5650 } 5651 #endif 5652 5653 /* assemble new additional is (if any) */ 5654 if (nis) { 5655 PetscInt **temp_idxs,*count_is,j,psum; 5656 5657 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5658 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5659 ptr_idxs = recv_buffer_idxs_is; 5660 psum = 0; 5661 for (i=0;i<n_recvs;i++) { 5662 for (j=0;j<nis;j++) { 5663 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5664 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5665 psum += plen; 5666 ptr_idxs += plen+1; /* shift pointer to received data */ 5667 } 5668 } 5669 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5670 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5671 for (i=1;i<nis;i++) { 5672 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5673 } 5674 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5675 ptr_idxs = recv_buffer_idxs_is; 5676 for (i=0;i<n_recvs;i++) { 5677 for (j=0;j<nis;j++) { 5678 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5679 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5680 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5681 ptr_idxs += plen+1; /* shift pointer to received data */ 5682 } 5683 } 5684 for (i=0;i<nis;i++) { 5685 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5686 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5687 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5688 } 5689 ierr = PetscFree(count_is);CHKERRQ(ierr); 5690 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5691 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5692 } 5693 /* free workspace */ 5694 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5695 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5696 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5697 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5698 if (isdense) { 5699 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5700 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5701 } else { 5702 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5703 } 5704 if (nis) { 5705 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5706 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5707 } 5708 5709 if (nvecs) { 5710 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5711 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5712 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5713 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5714 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5715 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5716 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5717 /* set values */ 5718 ptr_vals = recv_buffer_vecs; 5719 ptr_idxs = recv_buffer_idxs_local; 5720 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5721 for (i=0;i<n_recvs;i++) { 5722 PetscInt j; 5723 for (j=0;j<*(ptr_idxs+1);j++) { 5724 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5725 } 5726 ptr_idxs += olengths_idxs[i]; 5727 ptr_vals += olengths_idxs[i]-2; 5728 } 5729 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5730 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5731 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5732 } 5733 5734 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5735 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5736 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5737 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5738 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5739 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5740 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5741 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5742 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5743 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5744 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5745 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5746 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5747 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5748 ierr = PetscFree(onodes);CHKERRQ(ierr); 5749 if (nis) { 5750 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5751 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5752 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5753 } 5754 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5755 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5756 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5757 for (i=0;i<nis;i++) { 5758 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5759 } 5760 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5761 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5762 } 5763 *mat_n = NULL; 5764 } 5765 PetscFunctionReturn(0); 5766 } 5767 5768 /* temporary hack into ksp private data structure */ 5769 #include <petsc/private/kspimpl.h> 5770 5771 #undef __FUNCT__ 5772 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5773 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5774 { 5775 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5776 PC_IS *pcis = (PC_IS*)pc->data; 5777 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5778 Mat coarsedivudotp = NULL; 5779 MatNullSpace CoarseNullSpace = NULL; 5780 ISLocalToGlobalMapping coarse_islg; 5781 IS coarse_is,*isarray; 5782 PetscInt i,im_active=-1,active_procs=-1; 5783 PetscInt nis,nisdofs,nisneu,nisvert; 5784 PC pc_temp; 5785 PCType coarse_pc_type; 5786 KSPType coarse_ksp_type; 5787 PetscBool multilevel_requested,multilevel_allowed; 5788 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5789 Mat t_coarse_mat_is; 5790 PetscInt ncoarse; 5791 PetscBool compute_vecs = PETSC_FALSE; 5792 PetscScalar *array; 5793 MatReuse coarse_mat_reuse; 5794 PetscBool restr, full_restr, have_void; 5795 PetscErrorCode ierr; 5796 5797 PetscFunctionBegin; 5798 /* Assign global numbering to coarse dofs */ 5799 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5800 PetscInt ocoarse_size; 5801 compute_vecs = PETSC_TRUE; 5802 ocoarse_size = pcbddc->coarse_size; 5803 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5804 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5805 /* see if we can avoid some work */ 5806 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5807 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5808 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5809 PC pc; 5810 PetscBool isbddc; 5811 5812 /* temporary workaround since PCBDDC does not have a reset method so far */ 5813 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5814 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5815 if (isbddc) { 5816 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5817 } else { 5818 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5819 } 5820 coarse_reuse = PETSC_FALSE; 5821 } else { /* we can safely reuse already computed coarse matrix */ 5822 coarse_reuse = PETSC_TRUE; 5823 } 5824 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5825 coarse_reuse = PETSC_FALSE; 5826 } 5827 /* reset any subassembling information */ 5828 if (!coarse_reuse || pcbddc->recompute_topography) { 5829 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5830 } 5831 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5832 coarse_reuse = PETSC_TRUE; 5833 } 5834 /* assemble coarse matrix */ 5835 if (coarse_reuse && pcbddc->coarse_ksp) { 5836 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5837 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5838 coarse_mat_reuse = MAT_REUSE_MATRIX; 5839 } else { 5840 coarse_mat = NULL; 5841 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5842 } 5843 5844 /* creates temporary l2gmap and IS for coarse indexes */ 5845 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5846 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5847 5848 /* creates temporary MATIS object for coarse matrix */ 5849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5850 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5851 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5852 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5853 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5854 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5855 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5856 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5857 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5858 5859 /* count "active" (i.e. with positive local size) and "void" processes */ 5860 im_active = !!(pcis->n); 5861 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5862 5863 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5864 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5865 /* full_restr : just use the receivers from the subassembling pattern */ 5866 coarse_mat_is = NULL; 5867 multilevel_allowed = PETSC_FALSE; 5868 multilevel_requested = PETSC_FALSE; 5869 full_restr = PETSC_TRUE; 5870 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5871 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5872 if (multilevel_requested) { 5873 ncoarse = active_procs/pcbddc->coarsening_ratio; 5874 restr = PETSC_FALSE; 5875 full_restr = PETSC_FALSE; 5876 } else { 5877 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5878 restr = PETSC_TRUE; 5879 full_restr = PETSC_TRUE; 5880 } 5881 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5882 ncoarse = PetscMax(1,ncoarse); 5883 if (!pcbddc->coarse_subassembling) { 5884 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5885 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5886 PetscInt psum; 5887 PetscMPIInt size; 5888 if (pcbddc->coarse_ksp) psum = 1; 5889 else psum = 0; 5890 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5891 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5892 if (ncoarse < size) have_void = PETSC_TRUE; 5893 } 5894 /* determine if we can go multilevel */ 5895 if (multilevel_requested) { 5896 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5897 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5898 } 5899 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5900 5901 /* dump subassembling pattern */ 5902 if (pcbddc->dbg_flag && multilevel_allowed) { 5903 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5904 } 5905 5906 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5907 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5908 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5909 const PetscInt *idxs; 5910 ISLocalToGlobalMapping tmap; 5911 5912 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5913 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5914 /* allocate space for temporary storage */ 5915 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5916 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5917 /* allocate for IS array */ 5918 nisdofs = pcbddc->n_ISForDofsLocal; 5919 nisneu = !!pcbddc->NeumannBoundariesLocal; 5920 nisvert = 0; /* nisvert is not used */ 5921 nis = nisdofs + nisneu + nisvert; 5922 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5923 /* dofs splitting */ 5924 for (i=0;i<nisdofs;i++) { 5925 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5926 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5927 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5928 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5929 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5930 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5931 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5932 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5933 } 5934 /* neumann boundaries */ 5935 if (pcbddc->NeumannBoundariesLocal) { 5936 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5937 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5938 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5939 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5940 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5941 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5942 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5943 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5944 } 5945 /* free memory */ 5946 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5947 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5948 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5949 } else { 5950 nis = 0; 5951 nisdofs = 0; 5952 nisneu = 0; 5953 nisvert = 0; 5954 isarray = NULL; 5955 } 5956 /* destroy no longer needed map */ 5957 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5958 5959 /* subassemble */ 5960 if (multilevel_allowed) { 5961 Vec vp[1]; 5962 PetscInt nvecs = 0; 5963 PetscBool reuse,reuser; 5964 5965 if (coarse_mat) reuse = PETSC_TRUE; 5966 else reuse = PETSC_FALSE; 5967 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5968 vp[0] = NULL; 5969 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5970 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5971 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5972 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5973 nvecs = 1; 5974 5975 if (pcbddc->divudotp) { 5976 Mat B; 5977 Vec v,p; 5978 IS dummy; 5979 PetscInt np; 5980 5981 ierr = MatGetSize(pcbddc->divudotp,&np,NULL);CHKERRQ(ierr); 5982 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5983 ierr = MatGetSubMatrix(pcbddc->divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5984 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5985 ierr = VecSet(p,1.);CHKERRQ(ierr); 5986 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5987 ierr = VecDestroy(&p);CHKERRQ(ierr); 5988 ierr = MatDestroy(&B);CHKERRQ(ierr); 5989 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5990 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5991 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5992 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5993 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5994 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5995 ierr = VecDestroy(&v);CHKERRQ(ierr); 5996 } 5997 } 5998 if (reuser) { 5999 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6000 } else { 6001 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6002 } 6003 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6004 PetscScalar *arraym,*arrayv; 6005 PetscInt nl; 6006 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6007 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6008 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6009 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6010 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6011 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6012 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6013 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6014 } 6015 } else { 6016 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6017 } 6018 if (coarse_mat_is || coarse_mat) { 6019 PetscMPIInt size; 6020 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6021 if (!multilevel_allowed) { 6022 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6023 } else { 6024 Mat A; 6025 6026 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6027 if (coarse_mat_is) { 6028 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6029 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6030 coarse_mat = coarse_mat_is; 6031 } 6032 /* be sure we don't have MatSeqDENSE as local mat */ 6033 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6034 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6035 } 6036 } 6037 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6038 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6039 6040 /* create local to global scatters for coarse problem */ 6041 if (compute_vecs) { 6042 PetscInt lrows; 6043 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6044 if (coarse_mat) { 6045 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6046 } else { 6047 lrows = 0; 6048 } 6049 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6050 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6051 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6052 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6053 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6054 } 6055 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6056 6057 /* set defaults for coarse KSP and PC */ 6058 if (multilevel_allowed) { 6059 coarse_ksp_type = KSPRICHARDSON; 6060 coarse_pc_type = PCBDDC; 6061 } else { 6062 coarse_ksp_type = KSPPREONLY; 6063 coarse_pc_type = PCREDUNDANT; 6064 } 6065 6066 /* print some info if requested */ 6067 if (pcbddc->dbg_flag) { 6068 if (!multilevel_allowed) { 6069 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6070 if (multilevel_requested) { 6071 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6072 } else if (pcbddc->max_levels) { 6073 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6074 } 6075 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6076 } 6077 } 6078 6079 /* create the coarse KSP object only once with defaults */ 6080 if (coarse_mat) { 6081 PetscViewer dbg_viewer = NULL; 6082 if (pcbddc->dbg_flag) { 6083 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6084 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6085 } 6086 if (!pcbddc->coarse_ksp) { 6087 char prefix[256],str_level[16]; 6088 size_t len; 6089 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6090 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6091 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6092 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6093 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6094 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6095 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6096 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6097 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6098 /* prefix */ 6099 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6100 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6101 if (!pcbddc->current_level) { 6102 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6103 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6104 } else { 6105 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6106 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6107 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6108 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6109 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6110 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6111 } 6112 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6113 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6114 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6115 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6116 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6117 /* allow user customization */ 6118 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6119 } 6120 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6121 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6122 if (nisdofs) { 6123 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6124 for (i=0;i<nisdofs;i++) { 6125 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6126 } 6127 } 6128 if (nisneu) { 6129 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6130 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6131 } 6132 if (nisvert) { 6133 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6134 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6135 } 6136 6137 /* get some info after set from options */ 6138 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6139 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6140 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6141 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6142 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6143 isbddc = PETSC_FALSE; 6144 } 6145 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6146 if (isredundant) { 6147 KSP inner_ksp; 6148 PC inner_pc; 6149 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6150 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6151 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6152 } 6153 6154 /* parameters which miss an API */ 6155 if (isbddc) { 6156 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6157 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6158 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6159 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6160 if (pcbddc_coarse->benign_saddle_point) { 6161 if (coarsedivudotp) { 6162 ierr = MatDestroy(&pcbddc_coarse->divudotp);CHKERRQ(ierr); 6163 pcbddc_coarse->divudotp = coarsedivudotp; 6164 coarsedivudotp = NULL; 6165 } 6166 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 6167 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6168 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6169 } 6170 } 6171 6172 /* propagate symmetry info of coarse matrix */ 6173 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6174 if (pc->pmat->symmetric_set) { 6175 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6176 } 6177 if (pc->pmat->hermitian_set) { 6178 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6179 } 6180 if (pc->pmat->spd_set) { 6181 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6182 } 6183 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6184 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6185 } 6186 /* set operators */ 6187 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6188 if (pcbddc->dbg_flag) { 6189 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6190 } 6191 } 6192 ierr = PetscFree(isarray);CHKERRQ(ierr); 6193 #if 0 6194 { 6195 PetscViewer viewer; 6196 char filename[256]; 6197 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6198 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6199 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6200 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6201 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6202 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6203 } 6204 #endif 6205 6206 if (pcbddc->coarse_ksp) { 6207 Vec crhs,csol; 6208 6209 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6210 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6211 if (!csol) { 6212 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6213 } 6214 if (!crhs) { 6215 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6216 } 6217 } 6218 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6219 6220 /* compute null space for coarse solver if the benign trick has been requested */ 6221 if (pcbddc->benign_null) { 6222 6223 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6224 for (i=0;i<pcbddc->benign_n;i++) { 6225 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6226 } 6227 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6228 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6229 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6230 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6231 if (coarse_mat) { 6232 Vec nullv; 6233 PetscScalar *array,*array2; 6234 PetscInt nl; 6235 6236 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6237 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6238 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6239 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6240 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6241 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6242 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6243 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6244 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6245 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6246 } 6247 } 6248 6249 if (pcbddc->coarse_ksp) { 6250 PetscBool ispreonly; 6251 6252 if (CoarseNullSpace) { 6253 PetscBool isnull; 6254 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6255 if (isnull) { 6256 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6257 } 6258 /* TODO: add local nullspaces (if any) */ 6259 } 6260 /* setup coarse ksp */ 6261 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6262 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6263 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6264 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6265 KSP check_ksp; 6266 KSPType check_ksp_type; 6267 PC check_pc; 6268 Vec check_vec,coarse_vec; 6269 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6270 PetscInt its; 6271 PetscBool compute_eigs; 6272 PetscReal *eigs_r,*eigs_c; 6273 PetscInt neigs; 6274 const char *prefix; 6275 6276 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6277 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6278 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6279 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6280 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6281 /* prevent from setup unneeded object */ 6282 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6283 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6284 if (ispreonly) { 6285 check_ksp_type = KSPPREONLY; 6286 compute_eigs = PETSC_FALSE; 6287 } else { 6288 check_ksp_type = KSPGMRES; 6289 compute_eigs = PETSC_TRUE; 6290 } 6291 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6292 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6293 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6294 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6295 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6296 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6297 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6298 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6299 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6300 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6301 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6302 /* create random vec */ 6303 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6304 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6305 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6306 /* solve coarse problem */ 6307 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6308 /* set eigenvalue estimation if preonly has not been requested */ 6309 if (compute_eigs) { 6310 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6311 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6312 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6313 if (neigs) { 6314 lambda_max = eigs_r[neigs-1]; 6315 lambda_min = eigs_r[0]; 6316 if (pcbddc->use_coarse_estimates) { 6317 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6318 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6319 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6320 } 6321 } 6322 } 6323 } 6324 6325 /* check coarse problem residual error */ 6326 if (pcbddc->dbg_flag) { 6327 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6328 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6329 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6330 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6331 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6332 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6333 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6334 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6335 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6336 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6337 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6338 if (CoarseNullSpace) { 6339 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6340 } 6341 if (compute_eigs) { 6342 PetscReal lambda_max_s,lambda_min_s; 6343 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6344 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6345 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6346 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6347 for (i=0;i<neigs;i++) { 6348 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6349 } 6350 } 6351 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6352 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6353 } 6354 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6355 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6356 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6357 if (compute_eigs) { 6358 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6359 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6360 } 6361 } 6362 } 6363 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6364 /* print additional info */ 6365 if (pcbddc->dbg_flag) { 6366 /* waits until all processes reaches this point */ 6367 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6368 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6369 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6370 } 6371 6372 /* free memory */ 6373 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6374 PetscFunctionReturn(0); 6375 } 6376 6377 #undef __FUNCT__ 6378 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6379 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6380 { 6381 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6382 PC_IS* pcis = (PC_IS*)pc->data; 6383 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6384 IS subset,subset_mult,subset_n; 6385 PetscInt local_size,coarse_size=0; 6386 PetscInt *local_primal_indices=NULL; 6387 const PetscInt *t_local_primal_indices; 6388 PetscErrorCode ierr; 6389 6390 PetscFunctionBegin; 6391 /* Compute global number of coarse dofs */ 6392 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6393 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6394 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6395 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6396 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6397 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6398 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6399 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6400 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6401 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6402 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6403 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6404 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6405 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6406 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6407 6408 /* check numbering */ 6409 if (pcbddc->dbg_flag) { 6410 PetscScalar coarsesum,*array,*array2; 6411 PetscInt i; 6412 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6413 6414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6415 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6417 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6418 /* counter */ 6419 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6420 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6421 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6422 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6423 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6424 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6425 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6426 for (i=0;i<pcbddc->local_primal_size;i++) { 6427 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6428 } 6429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6431 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6432 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6433 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6434 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6435 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6436 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6437 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6438 for (i=0;i<pcis->n;i++) { 6439 if (array[i] != 0.0 && array[i] != array2[i]) { 6440 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6441 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6442 set_error = PETSC_TRUE; 6443 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6444 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6445 } 6446 } 6447 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6448 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6450 for (i=0;i<pcis->n;i++) { 6451 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6452 } 6453 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6454 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6455 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6456 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6457 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6458 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6459 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6460 PetscInt *gidxs; 6461 6462 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6463 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6464 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6465 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6467 for (i=0;i<pcbddc->local_primal_size;i++) { 6468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6469 } 6470 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6471 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6472 } 6473 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6474 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6475 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6476 } 6477 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6478 /* get back data */ 6479 *coarse_size_n = coarse_size; 6480 *local_primal_indices_n = local_primal_indices; 6481 PetscFunctionReturn(0); 6482 } 6483 6484 #undef __FUNCT__ 6485 #define __FUNCT__ "PCBDDCGlobalToLocal" 6486 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6487 { 6488 IS localis_t; 6489 PetscInt i,lsize,*idxs,n; 6490 PetscScalar *vals; 6491 PetscErrorCode ierr; 6492 6493 PetscFunctionBegin; 6494 /* get indices in local ordering exploiting local to global map */ 6495 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6496 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6497 for (i=0;i<lsize;i++) vals[i] = 1.0; 6498 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6499 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6500 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6501 if (idxs) { /* multilevel guard */ 6502 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6503 } 6504 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6505 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6506 ierr = PetscFree(vals);CHKERRQ(ierr); 6507 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6508 /* now compute set in local ordering */ 6509 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6510 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6511 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6512 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6513 for (i=0,lsize=0;i<n;i++) { 6514 if (PetscRealPart(vals[i]) > 0.5) { 6515 lsize++; 6516 } 6517 } 6518 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6519 for (i=0,lsize=0;i<n;i++) { 6520 if (PetscRealPart(vals[i]) > 0.5) { 6521 idxs[lsize++] = i; 6522 } 6523 } 6524 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6525 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6526 *localis = localis_t; 6527 PetscFunctionReturn(0); 6528 } 6529 6530 #undef __FUNCT__ 6531 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6532 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6533 { 6534 PC_IS *pcis=(PC_IS*)pc->data; 6535 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6536 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6537 Mat S_j; 6538 PetscInt *used_xadj,*used_adjncy; 6539 PetscBool free_used_adj; 6540 PetscErrorCode ierr; 6541 6542 PetscFunctionBegin; 6543 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6544 free_used_adj = PETSC_FALSE; 6545 if (pcbddc->sub_schurs_layers == -1) { 6546 used_xadj = NULL; 6547 used_adjncy = NULL; 6548 } else { 6549 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6550 used_xadj = pcbddc->mat_graph->xadj; 6551 used_adjncy = pcbddc->mat_graph->adjncy; 6552 } else if (pcbddc->computed_rowadj) { 6553 used_xadj = pcbddc->mat_graph->xadj; 6554 used_adjncy = pcbddc->mat_graph->adjncy; 6555 } else { 6556 PetscBool flg_row=PETSC_FALSE; 6557 const PetscInt *xadj,*adjncy; 6558 PetscInt nvtxs; 6559 6560 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6561 if (flg_row) { 6562 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6563 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6564 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6565 free_used_adj = PETSC_TRUE; 6566 } else { 6567 pcbddc->sub_schurs_layers = -1; 6568 used_xadj = NULL; 6569 used_adjncy = NULL; 6570 } 6571 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6572 } 6573 } 6574 6575 /* setup sub_schurs data */ 6576 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6577 if (!sub_schurs->schur_explicit) { 6578 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6579 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6580 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6581 } else { 6582 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6583 PetscBool isseqaij,need_change = PETSC_FALSE;; 6584 PetscInt benign_n; 6585 Mat change = NULL; 6586 Vec scaling = NULL; 6587 IS change_primal = NULL; 6588 6589 if (!pcbddc->use_vertices && reuse_solvers) { 6590 PetscInt n_vertices; 6591 6592 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6593 reuse_solvers = (PetscBool)!n_vertices; 6594 } 6595 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6596 if (!isseqaij) { 6597 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6598 if (matis->A == pcbddc->local_mat) { 6599 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6600 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6601 } else { 6602 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6603 } 6604 } 6605 if (!pcbddc->benign_change_explicit) { 6606 benign_n = pcbddc->benign_n; 6607 } else { 6608 benign_n = 0; 6609 } 6610 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6611 We need a global reduction to avoid possible deadlocks. 6612 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6613 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6614 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6615 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6616 need_change = (PetscBool)(!need_change); 6617 } 6618 /* If the user defines additional constraints, we import them here. 6619 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6620 if (need_change) { 6621 PC_IS *pcisf; 6622 PC_BDDC *pcbddcf; 6623 PC pcf; 6624 6625 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6626 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6627 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6628 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6629 /* hacks */ 6630 pcisf = (PC_IS*)pcf->data; 6631 pcisf->is_B_local = pcis->is_B_local; 6632 pcisf->vec1_N = pcis->vec1_N; 6633 pcisf->BtoNmap = pcis->BtoNmap; 6634 pcisf->n = pcis->n; 6635 pcisf->n_B = pcis->n_B; 6636 pcbddcf = (PC_BDDC*)pcf->data; 6637 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6638 pcbddcf->mat_graph = pcbddc->mat_graph; 6639 pcbddcf->use_faces = PETSC_TRUE; 6640 pcbddcf->use_change_of_basis = PETSC_TRUE; 6641 pcbddcf->use_change_on_faces = PETSC_TRUE; 6642 pcbddcf->use_qr_single = PETSC_TRUE; 6643 pcbddcf->fake_change = PETSC_TRUE; 6644 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6645 /* store information on primal vertices and change of basis (in local numbering) */ 6646 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6647 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6648 change = pcbddcf->ConstraintMatrix; 6649 pcbddcf->ConstraintMatrix = NULL; 6650 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6651 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6652 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6653 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6654 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6655 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6656 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6657 pcf->ops->destroy = NULL; 6658 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6659 } 6660 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6661 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6662 ierr = MatDestroy(&change);CHKERRQ(ierr); 6663 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6664 } 6665 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6666 6667 /* free adjacency */ 6668 if (free_used_adj) { 6669 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6670 } 6671 PetscFunctionReturn(0); 6672 } 6673 6674 #undef __FUNCT__ 6675 #define __FUNCT__ "PCBDDCInitSubSchurs" 6676 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6677 { 6678 PC_IS *pcis=(PC_IS*)pc->data; 6679 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6680 PCBDDCGraph graph; 6681 PetscErrorCode ierr; 6682 6683 PetscFunctionBegin; 6684 /* attach interface graph for determining subsets */ 6685 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6686 IS verticesIS,verticescomm; 6687 PetscInt vsize,*idxs; 6688 6689 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6690 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6691 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6692 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6693 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6694 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6695 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6696 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6697 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6698 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6699 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6700 } else { 6701 graph = pcbddc->mat_graph; 6702 } 6703 /* print some info */ 6704 if (pcbddc->dbg_flag) { 6705 IS vertices; 6706 PetscInt nv,nedges,nfaces; 6707 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6708 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6709 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6710 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6711 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6716 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6717 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6718 } 6719 6720 /* sub_schurs init */ 6721 if (!pcbddc->sub_schurs) { 6722 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6723 } 6724 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6725 6726 /* free graph struct */ 6727 if (pcbddc->sub_schurs_rebuild) { 6728 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6729 } 6730 PetscFunctionReturn(0); 6731 } 6732 6733 #undef __FUNCT__ 6734 #define __FUNCT__ "PCBDDCCheckOperator" 6735 PetscErrorCode PCBDDCCheckOperator(PC pc) 6736 { 6737 PC_IS *pcis=(PC_IS*)pc->data; 6738 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6739 PetscErrorCode ierr; 6740 6741 PetscFunctionBegin; 6742 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6743 IS zerodiag = NULL; 6744 Mat S_j,B0_B=NULL; 6745 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6746 PetscScalar *p0_check,*array,*array2; 6747 PetscReal norm; 6748 PetscInt i; 6749 6750 /* B0 and B0_B */ 6751 if (zerodiag) { 6752 IS dummy; 6753 6754 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6755 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6756 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6757 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6758 } 6759 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6760 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6761 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6762 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6763 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6764 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6765 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6766 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6767 /* S_j */ 6768 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6769 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6770 6771 /* mimic vector in \widetilde{W}_\Gamma */ 6772 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6773 /* continuous in primal space */ 6774 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6775 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6776 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6777 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6778 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6779 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6780 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6781 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6782 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6783 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6784 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6785 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6786 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6787 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6788 6789 /* assemble rhs for coarse problem */ 6790 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6791 /* local with Schur */ 6792 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6793 if (zerodiag) { 6794 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6795 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6796 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6797 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6798 } 6799 /* sum on primal nodes the local contributions */ 6800 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6801 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6802 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6803 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6804 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6805 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6806 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6807 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6808 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6809 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6810 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6811 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6812 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6813 /* scale primal nodes (BDDC sums contibutions) */ 6814 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6815 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6816 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6817 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6818 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6819 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6820 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6821 /* global: \widetilde{B0}_B w_\Gamma */ 6822 if (zerodiag) { 6823 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6824 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6825 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6826 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6827 } 6828 /* BDDC */ 6829 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6830 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6831 6832 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6833 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6834 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6835 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6836 for (i=0;i<pcbddc->benign_n;i++) { 6837 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6838 } 6839 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6840 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6841 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6842 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6843 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6844 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6845 } 6846 PetscFunctionReturn(0); 6847 } 6848