1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 10 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PCBDDCGraph graph, MatNullSpace *nnsp) 11 { 12 PetscErrorCode ierr; 13 ISLocalToGlobalMapping rmap; 14 IS *faces,*edges; 15 Vec p,v,quad_vec,*quad_vecs; 16 PetscScalar *vals; 17 const PetscScalar *array; 18 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 19 PetscMPIInt rank; 20 21 PetscFunctionBegin; 22 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 23 if (graph->twodim) { 24 lmaxneighs = 2; 25 } else { 26 lmaxneighs = 1; 27 for (i=0;i<ne;i++) { 28 const PetscInt *idxs; 29 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 30 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 31 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 32 } 33 lmaxneighs++; /* graph count does not include self */ 34 } 35 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 36 maxsize = 0; 37 for (i=0;i<ne;i++) { 38 PetscInt nn; 39 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 40 maxsize = PetscMax(maxsize,nn); 41 } 42 for (i=0;i<nf;i++) { 43 PetscInt nn; 44 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 45 maxsize = PetscMax(maxsize,nn); 46 } 47 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 48 /* create vectors to hold quadrature weights */ 49 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 50 ierr = MatGetLocalToGlobalMapping(A,&rmap,NULL);CHKERRQ(ierr); 51 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 52 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 53 for (i=0;i<maxneighs;i++) { 54 PetscInt first,last; 55 56 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],rmap);CHKERRQ(ierr); 57 /* the near-null space fo BDDC carries information on quadrature weights, 58 and these can be collinear -> so cheat with MatNullSpaceCreate 59 and create a suitable set of basis vectors first */ 60 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 61 if (i>=first && i < last) { 62 PetscScalar *data; 63 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 64 data[i-first] = 1.; 65 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 66 } 67 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 68 } 69 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 70 for (i=0;i<maxneighs;i++) { /* reset vectors */ 71 PetscInt first,last; 72 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 73 if (i>=first && i < last) { 74 PetscScalar *data; 75 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 76 data[i-first] = 0.; 77 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 78 } 79 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 80 } 81 /* compute local quad vec */ 82 ierr = MatCreateVecs(divudotp,&v,&p);CHKERRQ(ierr); 83 ierr = VecSet(p,1.);CHKERRQ(ierr); 84 ierr = MatMultTranspose(divudotp,p,v);CHKERRQ(ierr); 85 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 86 ierr = VecDestroy(&p);CHKERRQ(ierr); 87 /* insert in global quadrature vecs */ 88 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 89 for (i=0;i<nf;i++) { 90 const PetscInt *idxs; 91 PetscInt idx,nn,j; 92 93 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 94 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 95 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 96 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 97 idx = -(idx+1); 98 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 99 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 100 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 101 } 102 for (i=0;i<ne;i++) { 103 const PetscInt *idxs; 104 PetscInt idx,nn,j; 105 106 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 107 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 108 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 109 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 110 idx = -(idx+1); 111 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 112 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 113 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 114 } 115 ierr = PetscFree(edges);CHKERRQ(ierr); 116 ierr = PetscFree(faces);CHKERRQ(ierr); 117 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 118 ierr = VecDestroy(&v);CHKERRQ(ierr); 119 ierr = PetscFree(vals);CHKERRQ(ierr); 120 121 /* assemble near null space */ 122 for (i=0;i<maxneighs;i++) { 123 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 124 } 125 for (i=0;i<maxneighs;i++) { 126 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 127 } 128 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 135 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 136 { 137 PetscErrorCode ierr; 138 Vec local,global; 139 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 140 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 141 142 PetscFunctionBegin; 143 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 144 /* need to convert from global to local topology information and remove references to information in global ordering */ 145 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 146 if (pcbddc->user_provided_isfordofs) { 147 if (pcbddc->n_ISForDofs) { 148 PetscInt i; 149 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 150 for (i=0;i<pcbddc->n_ISForDofs;i++) { 151 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 152 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 153 } 154 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 155 pcbddc->n_ISForDofs = 0; 156 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 157 } 158 } else { 159 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 160 PetscInt i, n = matis->A->rmap->n; 161 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 162 if (i > 1) { 163 pcbddc->n_ISForDofsLocal = i; 164 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 165 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 166 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 167 } 168 } 169 } 170 } 171 172 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 173 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 174 } 175 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 176 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 177 } 178 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 179 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 180 } 181 ierr = VecDestroy(&global);CHKERRQ(ierr); 182 ierr = VecDestroy(&local);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 188 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 189 { 190 PC_IS *pcis = (PC_IS*)(pc->data); 191 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 192 PetscErrorCode ierr; 193 194 PetscFunctionBegin; 195 if (!pcbddc->benign_have_null) { 196 PetscFunctionReturn(0); 197 } 198 if (pcbddc->ChangeOfBasisMatrix) { 199 Vec swap; 200 201 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 202 swap = pcbddc->work_change; 203 pcbddc->work_change = r; 204 r = swap; 205 } 206 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 207 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 208 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 209 ierr = VecSet(z,0.);CHKERRQ(ierr); 210 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 211 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 212 if (pcbddc->ChangeOfBasisMatrix) { 213 Vec swap; 214 215 swap = r; 216 r = pcbddc->work_change; 217 pcbddc->work_change = swap; 218 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 219 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 220 } 221 PetscFunctionReturn(0); 222 } 223 224 #undef __FUNCT__ 225 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 226 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 227 { 228 PCBDDCBenignMatMult_ctx ctx; 229 PetscErrorCode ierr; 230 PetscBool apply_right,apply_left,reset_x; 231 232 PetscFunctionBegin; 233 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 234 if (transpose) { 235 apply_right = ctx->apply_left; 236 apply_left = ctx->apply_right; 237 } else { 238 apply_right = ctx->apply_right; 239 apply_left = ctx->apply_left; 240 } 241 reset_x = PETSC_FALSE; 242 if (apply_right) { 243 const PetscScalar *ax; 244 PetscInt nl,i; 245 246 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 247 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 248 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 249 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 250 for (i=0;i<ctx->benign_n;i++) { 251 PetscScalar sum,val; 252 const PetscInt *idxs; 253 PetscInt nz,j; 254 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 255 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 256 sum = 0.; 257 if (ctx->apply_p0) { 258 val = ctx->work[idxs[nz-1]]; 259 for (j=0;j<nz-1;j++) { 260 sum += ctx->work[idxs[j]]; 261 ctx->work[idxs[j]] += val; 262 } 263 } else { 264 for (j=0;j<nz-1;j++) { 265 sum += ctx->work[idxs[j]]; 266 } 267 } 268 ctx->work[idxs[nz-1]] -= sum; 269 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 270 } 271 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 272 reset_x = PETSC_TRUE; 273 } 274 if (transpose) { 275 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 276 } else { 277 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 278 } 279 if (reset_x) { 280 ierr = VecResetArray(x);CHKERRQ(ierr); 281 } 282 if (apply_left) { 283 PetscScalar *ay; 284 PetscInt i; 285 286 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 287 for (i=0;i<ctx->benign_n;i++) { 288 PetscScalar sum,val; 289 const PetscInt *idxs; 290 PetscInt nz,j; 291 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 292 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 293 val = -ay[idxs[nz-1]]; 294 if (ctx->apply_p0) { 295 sum = 0.; 296 for (j=0;j<nz-1;j++) { 297 sum += ay[idxs[j]]; 298 ay[idxs[j]] += val; 299 } 300 ay[idxs[nz-1]] += sum; 301 } else { 302 for (j=0;j<nz-1;j++) { 303 ay[idxs[j]] += val; 304 } 305 ay[idxs[nz-1]] = 0.; 306 } 307 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 308 } 309 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 316 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 317 { 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 322 PetscFunctionReturn(0); 323 } 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 327 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 328 { 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCBenignShellMat" 338 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 339 { 340 PC_IS *pcis = (PC_IS*)pc->data; 341 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 342 PCBDDCBenignMatMult_ctx ctx; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 if (!restore) { 347 Mat A_IB,A_BI; 348 PetscScalar *work; 349 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 350 351 if (pcbddc->benign_original_mat) { 352 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 353 } 354 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 355 PetscFunctionReturn(0); 356 } 357 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 358 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 359 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 360 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 361 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 362 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 363 ierr = PetscNew(&ctx);CHKERRQ(ierr); 364 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 365 ctx->apply_left = PETSC_TRUE; 366 ctx->apply_right = PETSC_FALSE; 367 ctx->apply_p0 = PETSC_FALSE; 368 ctx->benign_n = pcbddc->benign_n; 369 if (reuse) { 370 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 371 ctx->free = PETSC_FALSE; 372 } else { /* TODO: could be optimized for successive solves */ 373 ISLocalToGlobalMapping N_to_D; 374 PetscInt i; 375 376 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 377 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 378 for (i=0;i<pcbddc->benign_n;i++) { 379 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 380 } 381 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 382 ctx->free = PETSC_TRUE; 383 } 384 ctx->A = pcis->A_IB; 385 ctx->work = work; 386 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 387 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 388 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 389 pcis->A_IB = A_IB; 390 391 /* A_BI as A_IB^T */ 392 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 393 pcbddc->benign_original_mat = pcis->A_BI; 394 pcis->A_BI = A_BI; 395 } else { 396 if (!pcbddc->benign_original_mat) { 397 PetscFunctionReturn(0); 398 } 399 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 401 pcis->A_IB = ctx->A; 402 ctx->A = NULL; 403 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 404 pcis->A_BI = pcbddc->benign_original_mat; 405 pcbddc->benign_original_mat = NULL; 406 if (ctx->free) { 407 PetscInt i; 408 for (i=0;i<ctx->benign_n;i++) { 409 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 410 } 411 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 412 } 413 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 414 ierr = PetscFree(ctx);CHKERRQ(ierr); 415 } 416 PetscFunctionReturn(0); 417 } 418 419 /* used just in bddc debug mode */ 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCBDDCBenignProject" 422 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 423 { 424 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 425 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 426 Mat An; 427 PetscErrorCode ierr; 428 429 PetscFunctionBegin; 430 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 431 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 432 if (is1) { 433 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 434 ierr = MatDestroy(&An);CHKERRQ(ierr); 435 } else { 436 *B = An; 437 } 438 PetscFunctionReturn(0); 439 } 440 441 /* TODO: add reuse flag */ 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatSeqAIJCompress" 444 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 445 { 446 Mat Bt; 447 PetscScalar *a,*bdata; 448 const PetscInt *ii,*ij; 449 PetscInt m,n,i,nnz,*bii,*bij; 450 PetscBool flg_row; 451 PetscErrorCode ierr; 452 453 PetscFunctionBegin; 454 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 455 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 456 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 457 nnz = n; 458 for (i=0;i<ii[n];i++) { 459 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 460 } 461 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 462 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 463 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 464 nnz = 0; 465 bii[0] = 0; 466 for (i=0;i<n;i++) { 467 PetscInt j; 468 for (j=ii[i];j<ii[i+1];j++) { 469 PetscScalar entry = a[j]; 470 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 471 bij[nnz] = ij[j]; 472 bdata[nnz] = entry; 473 nnz++; 474 } 475 } 476 bii[i+1] = nnz; 477 } 478 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 479 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 480 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 481 { 482 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 483 b->free_a = PETSC_TRUE; 484 b->free_ij = PETSC_TRUE; 485 } 486 *B = Bt; 487 PetscFunctionReturn(0); 488 } 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatDetectDisconnectedComponents" 492 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 493 { 494 Mat B; 495 IS is_dummy,*cc_n; 496 ISLocalToGlobalMapping l2gmap_dummy; 497 PCBDDCGraph graph; 498 PetscInt i,n; 499 PetscInt *xadj,*adjncy; 500 PetscInt *xadj_filtered,*adjncy_filtered; 501 PetscBool flg_row,isseqaij; 502 PetscErrorCode ierr; 503 504 PetscFunctionBegin; 505 if (!A->rmap->N || !A->cmap->N) { 506 *ncc = 0; 507 *cc = NULL; 508 PetscFunctionReturn(0); 509 } 510 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 511 if (!isseqaij && filter) { 512 PetscBool isseqdense; 513 514 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 515 if (!isseqdense) { 516 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 517 } else { /* TODO: rectangular case and LDA */ 518 PetscScalar *array; 519 PetscReal chop=1.e-6; 520 521 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 522 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 523 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 524 for (i=0;i<n;i++) { 525 PetscInt j; 526 for (j=i+1;j<n;j++) { 527 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 528 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 529 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 530 } 531 } 532 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 533 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 534 } 535 } else { 536 B = A; 537 } 538 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 539 540 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 541 if (filter) { 542 PetscScalar *data; 543 PetscInt j,cum; 544 545 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 546 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 547 cum = 0; 548 for (i=0;i<n;i++) { 549 PetscInt t; 550 551 for (j=xadj[i];j<xadj[i+1];j++) { 552 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 553 continue; 554 } 555 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 556 } 557 t = xadj_filtered[i]; 558 xadj_filtered[i] = cum; 559 cum += t; 560 } 561 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 562 } else { 563 xadj_filtered = NULL; 564 adjncy_filtered = NULL; 565 } 566 567 /* compute local connected components using PCBDDCGraph */ 568 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 569 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 570 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 571 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 572 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 573 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 574 if (xadj_filtered) { 575 graph->xadj = xadj_filtered; 576 graph->adjncy = adjncy_filtered; 577 } else { 578 graph->xadj = xadj; 579 graph->adjncy = adjncy; 580 } 581 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 582 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 583 /* partial clean up */ 584 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 585 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 586 if (A != B) { 587 ierr = MatDestroy(&B);CHKERRQ(ierr); 588 } 589 590 /* get back data */ 591 if (ncc) *ncc = graph->ncc; 592 if (cc) { 593 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 594 for (i=0;i<graph->ncc;i++) { 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 596 } 597 *cc = cc_n; 598 } 599 /* clean up graph */ 600 graph->xadj = 0; 601 graph->adjncy = 0; 602 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "PCBDDCBenignCheck" 608 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 609 { 610 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 611 PC_IS* pcis = (PC_IS*)(pc->data); 612 IS dirIS = NULL; 613 PetscInt i; 614 PetscErrorCode ierr; 615 616 PetscFunctionBegin; 617 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 618 if (zerodiag) { 619 Mat A; 620 Vec vec3_N; 621 PetscScalar *vals; 622 const PetscInt *idxs; 623 PetscInt nz,*count; 624 625 /* p0 */ 626 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 627 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 628 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 629 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 630 for (i=0;i<nz;i++) vals[i] = 1.; 631 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 632 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 633 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 634 /* v_I */ 635 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 636 for (i=0;i<nz;i++) vals[i] = 0.; 637 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 638 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 639 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 640 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 641 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 642 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 643 if (dirIS) { 644 PetscInt n; 645 646 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 647 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 648 for (i=0;i<n;i++) vals[i] = 0.; 649 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 650 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 651 } 652 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 653 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 654 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 655 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 656 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 657 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 658 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 659 if (PetscAbsScalar(vals[0]) > 1.e-1) { 660 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 661 } 662 ierr = PetscFree(vals);CHKERRQ(ierr); 663 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 664 665 /* there should not be any pressure dofs lying on the interface */ 666 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 667 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 668 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 669 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 670 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 671 for (i=0;i<nz;i++) { 672 if (count[idxs[i]]) { 673 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 674 } 675 } 676 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 677 ierr = PetscFree(count);CHKERRQ(ierr); 678 } 679 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 680 681 /* check PCBDDCBenignGetOrSetP0 */ 682 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 683 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 684 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 685 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 686 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 687 for (i=0;i<pcbddc->benign_n;i++) { 688 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 689 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 690 } 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 697 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 698 { 699 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 700 IS pressures,zerodiag,*zerodiag_subs; 701 PetscInt nz,n; 702 PetscInt *interior_dofs,n_interior_dofs; 703 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 708 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 709 for (n=0;n<pcbddc->benign_n;n++) { 710 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 711 } 712 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 713 pcbddc->benign_n = 0; 714 /* if a local info on dofs is present, assumes that the last field represents "pressures" 715 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 716 Checks if all the pressure dofs in each subdomain have a zero diagonal 717 If not, a change of basis on pressures is not needed 718 since the local Schur complements are already SPD 719 */ 720 has_null_pressures = PETSC_TRUE; 721 have_null = PETSC_TRUE; 722 if (pcbddc->n_ISForDofsLocal) { 723 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 724 725 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 726 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 727 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 728 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 729 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 730 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 731 if (!sorted) { 732 ierr = ISSort(pressures);CHKERRQ(ierr); 733 } 734 } else { 735 pressures = NULL; 736 } 737 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 738 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 739 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 740 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 741 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 742 if (!sorted) { 743 ierr = ISSort(zerodiag);CHKERRQ(ierr); 744 } 745 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 746 if (!nz) { 747 if (n) have_null = PETSC_FALSE; 748 has_null_pressures = PETSC_FALSE; 749 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 750 } 751 recompute_zerodiag = PETSC_FALSE; 752 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 753 zerodiag_subs = NULL; 754 pcbddc->benign_n = 0; 755 n_interior_dofs = 0; 756 interior_dofs = NULL; 757 if (pcbddc->current_level) { /* need to compute interior nodes */ 758 PetscInt n,i,j; 759 PetscInt n_neigh,*neigh,*n_shared,**shared; 760 PetscInt *iwork; 761 762 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 763 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 764 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 765 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 766 for (i=0;i<n_neigh;i++) 767 for (j=0;j<n_shared[i];j++) 768 iwork[shared[i][j]] += 1; 769 for (i=0;i<n;i++) 770 if (!iwork[i]) 771 interior_dofs[n_interior_dofs++] = i; 772 ierr = PetscFree(iwork);CHKERRQ(ierr); 773 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 774 } 775 if (has_null_pressures) { 776 IS *subs; 777 PetscInt nsubs,i,j,nl; 778 const PetscInt *idxs; 779 PetscScalar *array; 780 Vec *work; 781 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 782 783 subs = pcbddc->local_subs; 784 nsubs = pcbddc->n_local_subs; 785 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 786 if (pcbddc->current_level) { 787 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 788 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 789 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 790 /* work[0] = 1_p */ 791 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 792 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 793 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 794 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 795 /* work[0] = 1_v */ 796 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 797 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 798 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 799 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 800 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 801 } 802 if (nsubs > 1) { 803 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 804 for (i=0;i<nsubs;i++) { 805 ISLocalToGlobalMapping l2g; 806 IS t_zerodiag_subs; 807 PetscInt nl; 808 809 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 810 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 811 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 812 if (nl) { 813 PetscBool valid = PETSC_TRUE; 814 815 if (pcbddc->current_level) { 816 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 817 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 818 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 819 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 820 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 821 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 822 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 823 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 824 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 825 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 826 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 827 for (j=0;j<n_interior_dofs;j++) { 828 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 829 valid = PETSC_FALSE; 830 break; 831 } 832 } 833 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 834 } 835 if (valid && pcbddc->NeumannBoundariesLocal) { 836 IS t_bc; 837 PetscInt nzb; 838 839 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 840 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 841 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 842 if (nzb) valid = PETSC_FALSE; 843 } 844 if (valid && pressures) { 845 IS t_pressure_subs; 846 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 847 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 848 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 849 } 850 if (valid) { 851 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 852 pcbddc->benign_n++; 853 } else { 854 recompute_zerodiag = PETSC_TRUE; 855 } 856 } 857 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 858 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 859 } 860 } else { /* there's just one subdomain (or zero if they have not been detected */ 861 PetscBool valid = PETSC_TRUE; 862 863 if (pcbddc->NeumannBoundariesLocal) { 864 PetscInt nzb; 865 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 866 if (nzb) valid = PETSC_FALSE; 867 } 868 if (valid && pressures) { 869 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 870 } 871 if (valid && pcbddc->current_level) { 872 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 873 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 874 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 875 for (j=0;j<n_interior_dofs;j++) { 876 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 877 valid = PETSC_FALSE; 878 break; 879 } 880 } 881 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 882 } 883 if (valid) { 884 pcbddc->benign_n = 1; 885 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 886 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 887 zerodiag_subs[0] = zerodiag; 888 } 889 } 890 if (pcbddc->current_level) { 891 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 892 } 893 } 894 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 895 896 if (!pcbddc->benign_n) { 897 PetscInt n; 898 899 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 900 recompute_zerodiag = PETSC_FALSE; 901 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 902 if (n) { 903 has_null_pressures = PETSC_FALSE; 904 have_null = PETSC_FALSE; 905 } 906 } 907 908 /* final check for null pressures */ 909 if (zerodiag && pressures) { 910 PetscInt nz,np; 911 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 912 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 913 if (nz != np) have_null = PETSC_FALSE; 914 } 915 916 if (recompute_zerodiag) { 917 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 918 if (pcbddc->benign_n == 1) { 919 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 920 zerodiag = zerodiag_subs[0]; 921 } else { 922 PetscInt i,nzn,*new_idxs; 923 924 nzn = 0; 925 for (i=0;i<pcbddc->benign_n;i++) { 926 PetscInt ns; 927 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 928 nzn += ns; 929 } 930 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 931 nzn = 0; 932 for (i=0;i<pcbddc->benign_n;i++) { 933 PetscInt ns,*idxs; 934 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 935 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 936 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 937 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 938 nzn += ns; 939 } 940 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 941 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 942 } 943 have_null = PETSC_FALSE; 944 } 945 946 /* Prepare matrix to compute no-net-flux */ 947 if (pcbddc->benign_compute_nonetflux && !pcbddc->divudotp) { 948 Mat A; 949 IS isused = NULL; 950 PetscInt n; 951 if (pressures) { 952 isused = pressures; 953 } else { 954 isused = zerodiag; 955 } 956 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 957 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 958 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 959 if (n) { 960 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&pcbddc->divudotp);CHKERRQ(ierr); 961 } else { 962 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&pcbddc->divudotp);CHKERRQ(ierr); 963 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 964 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 } 966 } 967 968 /* change of basis and p0 dofs */ 969 if (has_null_pressures) { 970 IS zerodiagc; 971 const PetscInt *idxs,*idxsc; 972 PetscInt i,s,*nnz; 973 974 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 975 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 976 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 977 /* local change of basis for pressures */ 978 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 979 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 980 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 981 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 982 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 983 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 984 for (i=0;i<pcbddc->benign_n;i++) { 985 PetscInt nzs,j; 986 987 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 988 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 989 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 990 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 991 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 992 } 993 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 994 ierr = PetscFree(nnz);CHKERRQ(ierr); 995 /* set identity on velocities */ 996 for (i=0;i<n-nz;i++) { 997 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 998 } 999 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1000 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1001 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1002 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1003 /* set change on pressures */ 1004 for (s=0;s<pcbddc->benign_n;s++) { 1005 PetscScalar *array; 1006 PetscInt nzs; 1007 1008 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1009 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1010 for (i=0;i<nzs-1;i++) { 1011 PetscScalar vals[2]; 1012 PetscInt cols[2]; 1013 1014 cols[0] = idxs[i]; 1015 cols[1] = idxs[nzs-1]; 1016 vals[0] = 1.; 1017 vals[1] = 1.; 1018 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1019 } 1020 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1021 for (i=0;i<nzs-1;i++) array[i] = -1.; 1022 array[nzs-1] = 1.; 1023 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1024 /* store local idxs for p0 */ 1025 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1026 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1027 ierr = PetscFree(array);CHKERRQ(ierr); 1028 } 1029 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1030 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1031 /* project if needed */ 1032 if (pcbddc->benign_change_explicit) { 1033 Mat M; 1034 1035 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1036 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1037 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1038 ierr = MatDestroy(&M);CHKERRQ(ierr); 1039 } 1040 /* store global idxs for p0 */ 1041 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1042 } 1043 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1044 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1045 1046 /* determines if the coarse solver will be singular or not */ 1047 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1048 /* determines if the problem has subdomains with 0 pressure block */ 1049 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1050 *zerodiaglocal = zerodiag; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1056 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1057 { 1058 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1059 PetscScalar *array; 1060 PetscErrorCode ierr; 1061 1062 PetscFunctionBegin; 1063 if (!pcbddc->benign_sf) { 1064 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1065 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1066 } 1067 if (get) { 1068 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1069 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1070 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1071 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1072 } else { 1073 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1074 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1075 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1076 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1077 } 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1083 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1084 { 1085 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 /* TODO: add error checking 1090 - avoid nested pop (or push) calls. 1091 - cannot push before pop. 1092 - cannot call this if pcbddc->local_mat is NULL 1093 */ 1094 if (!pcbddc->benign_n) { 1095 PetscFunctionReturn(0); 1096 } 1097 if (pop) { 1098 if (pcbddc->benign_change_explicit) { 1099 IS is_p0; 1100 MatReuse reuse; 1101 1102 /* extract B_0 */ 1103 reuse = MAT_INITIAL_MATRIX; 1104 if (pcbddc->benign_B0) { 1105 reuse = MAT_REUSE_MATRIX; 1106 } 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1108 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1109 /* remove rows and cols from local problem */ 1110 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1111 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1112 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1113 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1114 } else { 1115 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1116 PetscScalar *vals; 1117 PetscInt i,n,*idxs_ins; 1118 1119 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1120 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1121 if (!pcbddc->benign_B0) { 1122 PetscInt *nnz; 1123 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1124 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1125 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1126 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1127 for (i=0;i<pcbddc->benign_n;i++) { 1128 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1129 nnz[i] = n - nnz[i]; 1130 } 1131 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1132 ierr = PetscFree(nnz);CHKERRQ(ierr); 1133 } 1134 1135 for (i=0;i<pcbddc->benign_n;i++) { 1136 PetscScalar *array; 1137 PetscInt *idxs,j,nz,cum; 1138 1139 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1140 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1141 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1142 for (j=0;j<nz;j++) vals[j] = 1.; 1143 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1144 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1145 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1146 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1147 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1148 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1149 cum = 0; 1150 for (j=0;j<n;j++) { 1151 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1152 vals[cum] = array[j]; 1153 idxs_ins[cum] = j; 1154 cum++; 1155 } 1156 } 1157 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1158 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1159 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1160 } 1161 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1162 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1164 } 1165 } else { /* push */ 1166 if (pcbddc->benign_change_explicit) { 1167 PetscInt i; 1168 1169 for (i=0;i<pcbddc->benign_n;i++) { 1170 PetscScalar *B0_vals; 1171 PetscInt *B0_cols,B0_ncol; 1172 1173 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1174 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1175 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1176 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1177 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1178 } 1179 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1180 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1181 } else { 1182 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1183 } 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1190 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1191 { 1192 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1193 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1194 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1195 PetscBLASInt *B_iwork,*B_ifail; 1196 PetscScalar *work,lwork; 1197 PetscScalar *St,*S,*eigv; 1198 PetscScalar *Sarray,*Starray; 1199 PetscReal *eigs,thresh; 1200 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1201 PetscBool allocated_S_St; 1202 #if defined(PETSC_USE_COMPLEX) 1203 PetscReal *rwork; 1204 #endif 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1209 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1210 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1211 1212 if (pcbddc->dbg_flag) { 1213 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1214 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1216 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1217 } 1218 1219 if (pcbddc->dbg_flag) { 1220 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1221 } 1222 1223 /* max size of subsets */ 1224 mss = 0; 1225 for (i=0;i<sub_schurs->n_subs;i++) { 1226 PetscInt subset_size; 1227 1228 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1229 mss = PetscMax(mss,subset_size); 1230 } 1231 1232 /* min/max and threshold */ 1233 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1234 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1235 nmax = PetscMax(nmin,nmax); 1236 allocated_S_St = PETSC_FALSE; 1237 if (nmin) { 1238 allocated_S_St = PETSC_TRUE; 1239 } 1240 1241 /* allocate lapack workspace */ 1242 cum = cum2 = 0; 1243 maxneigs = 0; 1244 for (i=0;i<sub_schurs->n_subs;i++) { 1245 PetscInt n,subset_size; 1246 1247 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1248 n = PetscMin(subset_size,nmax); 1249 cum += subset_size; 1250 cum2 += subset_size*n; 1251 maxneigs = PetscMax(maxneigs,n); 1252 } 1253 if (mss) { 1254 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1255 PetscBLASInt B_itype = 1; 1256 PetscBLASInt B_N = mss; 1257 PetscReal zero = 0.0; 1258 PetscReal eps = 0.0; /* dlamch? */ 1259 1260 B_lwork = -1; 1261 S = NULL; 1262 St = NULL; 1263 eigs = NULL; 1264 eigv = NULL; 1265 B_iwork = NULL; 1266 B_ifail = NULL; 1267 #if defined(PETSC_USE_COMPLEX) 1268 rwork = NULL; 1269 #endif 1270 thresh = 1.0; 1271 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1272 #if defined(PETSC_USE_COMPLEX) 1273 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1274 #else 1275 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1276 #endif 1277 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1278 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1279 } else { 1280 /* TODO */ 1281 } 1282 } else { 1283 lwork = 0; 1284 } 1285 1286 nv = 0; 1287 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1288 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1289 } 1290 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1291 if (allocated_S_St) { 1292 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1293 } 1294 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1295 #if defined(PETSC_USE_COMPLEX) 1296 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1297 #endif 1298 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1299 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1300 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1301 nv+cum,&pcbddc->adaptive_constraints_idxs, 1302 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1303 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1304 1305 maxneigs = 0; 1306 cum = cumarray = 0; 1307 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1308 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1309 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1310 const PetscInt *idxs; 1311 1312 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1313 for (cum=0;cum<nv;cum++) { 1314 pcbddc->adaptive_constraints_n[cum] = 1; 1315 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1316 pcbddc->adaptive_constraints_data[cum] = 1.0; 1317 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1318 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1319 } 1320 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1321 } 1322 1323 if (mss) { /* multilevel */ 1324 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1325 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1326 } 1327 1328 thresh = pcbddc->adaptive_threshold; 1329 for (i=0;i<sub_schurs->n_subs;i++) { 1330 const PetscInt *idxs; 1331 PetscReal upper,lower; 1332 PetscInt j,subset_size,eigs_start = 0; 1333 PetscBLASInt B_N; 1334 PetscBool same_data = PETSC_FALSE; 1335 1336 if (pcbddc->use_deluxe_scaling) { 1337 upper = PETSC_MAX_REAL; 1338 lower = thresh; 1339 } else { 1340 upper = 1./thresh; 1341 lower = 0.; 1342 } 1343 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1344 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1345 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1346 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1347 if (sub_schurs->is_hermitian) { 1348 PetscInt j,k; 1349 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1350 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1351 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1352 } 1353 for (j=0;j<subset_size;j++) { 1354 for (k=j;k<subset_size;k++) { 1355 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1356 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1357 } 1358 } 1359 } else { 1360 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1361 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1362 } 1363 } else { 1364 S = Sarray + cumarray; 1365 St = Starray + cumarray; 1366 } 1367 /* see if we can save some work */ 1368 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1369 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1370 } 1371 1372 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1373 B_neigs = 0; 1374 } else { 1375 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1376 PetscBLASInt B_itype = 1; 1377 PetscBLASInt B_IL, B_IU; 1378 PetscReal eps = -1.0; /* dlamch? */ 1379 PetscInt nmin_s; 1380 PetscBool compute_range = PETSC_FALSE; 1381 1382 if (pcbddc->dbg_flag) { 1383 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1384 } 1385 1386 compute_range = PETSC_FALSE; 1387 if (thresh > 1.+PETSC_SMALL && !same_data) { 1388 compute_range = PETSC_TRUE; 1389 } 1390 1391 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1392 if (compute_range) { 1393 1394 /* ask for eigenvalues larger than thresh */ 1395 #if defined(PETSC_USE_COMPLEX) 1396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1397 #else 1398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1399 #endif 1400 } else if (!same_data) { 1401 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1402 B_IL = 1; 1403 #if defined(PETSC_USE_COMPLEX) 1404 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1405 #else 1406 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1407 #endif 1408 } else { /* same_data is true, so get the adaptive function requested by the user */ 1409 PetscInt k; 1410 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1411 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1412 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1413 nmin = nmax; 1414 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1415 for (k=0;k<nmax;k++) { 1416 eigs[k] = 1./PETSC_SMALL; 1417 eigv[k*(subset_size+1)] = 1.0; 1418 } 1419 } 1420 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1421 if (B_ierr) { 1422 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1423 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1424 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1425 } 1426 1427 if (B_neigs > nmax) { 1428 if (pcbddc->dbg_flag) { 1429 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1430 } 1431 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1432 B_neigs = nmax; 1433 } 1434 1435 nmin_s = PetscMin(nmin,B_N); 1436 if (B_neigs < nmin_s) { 1437 PetscBLASInt B_neigs2; 1438 1439 if (pcbddc->use_deluxe_scaling) { 1440 B_IL = B_N - nmin_s + 1; 1441 B_IU = B_N - B_neigs; 1442 } else { 1443 B_IL = B_neigs + 1; 1444 B_IU = nmin_s; 1445 } 1446 if (pcbddc->dbg_flag) { 1447 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1448 } 1449 if (sub_schurs->is_hermitian) { 1450 PetscInt j,k; 1451 for (j=0;j<subset_size;j++) { 1452 for (k=j;k<subset_size;k++) { 1453 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1454 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1455 } 1456 } 1457 } else { 1458 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1459 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1460 } 1461 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1462 #if defined(PETSC_USE_COMPLEX) 1463 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1464 #else 1465 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1466 #endif 1467 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1468 B_neigs += B_neigs2; 1469 } 1470 if (B_ierr) { 1471 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1472 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1473 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1474 } 1475 if (pcbddc->dbg_flag) { 1476 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1477 for (j=0;j<B_neigs;j++) { 1478 if (eigs[j] == 0.0) { 1479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1480 } else { 1481 if (pcbddc->use_deluxe_scaling) { 1482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1483 } else { 1484 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1485 } 1486 } 1487 } 1488 } 1489 } else { 1490 /* TODO */ 1491 } 1492 } 1493 /* change the basis back to the original one */ 1494 if (sub_schurs->change) { 1495 Mat change,phi,phit; 1496 1497 if (pcbddc->dbg_flag > 1) { 1498 PetscInt ii; 1499 for (ii=0;ii<B_neigs;ii++) { 1500 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1501 for (j=0;j<B_N;j++) { 1502 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1503 } 1504 } 1505 } 1506 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1507 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1508 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1509 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1510 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1511 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1512 } 1513 maxneigs = PetscMax(B_neigs,maxneigs); 1514 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1515 if (B_neigs) { 1516 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1517 1518 if (pcbddc->dbg_flag > 1) { 1519 PetscInt ii; 1520 for (ii=0;ii<B_neigs;ii++) { 1521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1522 for (j=0;j<B_N;j++) { 1523 #if defined(PETSC_USE_COMPLEX) 1524 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1525 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1527 #else 1528 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1529 #endif 1530 } 1531 } 1532 } 1533 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1534 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1535 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1536 cum++; 1537 } 1538 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1539 /* shift for next computation */ 1540 cumarray += subset_size*subset_size; 1541 } 1542 if (pcbddc->dbg_flag) { 1543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1544 } 1545 1546 if (mss) { 1547 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1548 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1549 /* destroy matrices (junk) */ 1550 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1551 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1552 } 1553 if (allocated_S_St) { 1554 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1555 } 1556 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1557 #if defined(PETSC_USE_COMPLEX) 1558 ierr = PetscFree(rwork);CHKERRQ(ierr); 1559 #endif 1560 if (pcbddc->dbg_flag) { 1561 PetscInt maxneigs_r; 1562 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1563 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "PCBDDCSetUpSolvers" 1570 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1571 { 1572 PetscScalar *coarse_submat_vals; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1577 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1578 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1579 1580 /* Setup local neumann solver ksp_R */ 1581 /* PCBDDCSetUpLocalScatters should be called first! */ 1582 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1583 1584 /* 1585 Setup local correction and local part of coarse basis. 1586 Gives back the dense local part of the coarse matrix in column major ordering 1587 */ 1588 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1589 1590 /* Compute total number of coarse nodes and setup coarse solver */ 1591 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1592 1593 /* free */ 1594 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "PCBDDCResetCustomization" 1600 PetscErrorCode PCBDDCResetCustomization(PC pc) 1601 { 1602 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1603 PetscErrorCode ierr; 1604 1605 PetscFunctionBegin; 1606 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1607 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1608 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1609 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1610 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1611 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1612 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1613 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1614 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1615 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1616 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "PCBDDCResetTopography" 1622 PetscErrorCode PCBDDCResetTopography(PC pc) 1623 { 1624 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1625 PetscInt i; 1626 PetscErrorCode ierr; 1627 1628 PetscFunctionBegin; 1629 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1630 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1631 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1632 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1633 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1634 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1635 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1636 for (i=0;i<pcbddc->n_local_subs;i++) { 1637 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1638 } 1639 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1640 if (pcbddc->sub_schurs) { 1641 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1642 } 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "PCBDDCResetSolvers" 1648 PetscErrorCode PCBDDCResetSolvers(PC pc) 1649 { 1650 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1651 PetscErrorCode ierr; 1652 1653 PetscFunctionBegin; 1654 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1655 if (pcbddc->coarse_phi_B) { 1656 PetscScalar *array; 1657 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1658 ierr = PetscFree(array);CHKERRQ(ierr); 1659 } 1660 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1661 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1662 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1663 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1664 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1665 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1666 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1667 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1668 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1669 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1670 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1671 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1672 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1673 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1674 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1675 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1676 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1677 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1678 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1679 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1680 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1681 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1682 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1683 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1684 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1685 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1686 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1687 if (pcbddc->benign_zerodiag_subs) { 1688 PetscInt i; 1689 for (i=0;i<pcbddc->benign_n;i++) { 1690 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1691 } 1692 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1693 } 1694 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1700 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1701 { 1702 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1703 PC_IS *pcis = (PC_IS*)pc->data; 1704 VecType impVecType; 1705 PetscInt n_constraints,n_R,old_size; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1710 /* get sizes */ 1711 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1712 n_R = pcis->n - pcbddc->n_vertices; 1713 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1714 /* local work vectors (try to avoid unneeded work)*/ 1715 /* R nodes */ 1716 old_size = -1; 1717 if (pcbddc->vec1_R) { 1718 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1719 } 1720 if (n_R != old_size) { 1721 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1722 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1723 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1724 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1725 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1726 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1727 } 1728 /* local primal dofs */ 1729 old_size = -1; 1730 if (pcbddc->vec1_P) { 1731 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1732 } 1733 if (pcbddc->local_primal_size != old_size) { 1734 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1735 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1736 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1737 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1738 } 1739 /* local explicit constraints */ 1740 old_size = -1; 1741 if (pcbddc->vec1_C) { 1742 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1743 } 1744 if (n_constraints && n_constraints != old_size) { 1745 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1746 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1747 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1748 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "PCBDDCSetUpCorrection" 1755 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1756 { 1757 PetscErrorCode ierr; 1758 /* pointers to pcis and pcbddc */ 1759 PC_IS* pcis = (PC_IS*)pc->data; 1760 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1761 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1762 /* submatrices of local problem */ 1763 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1764 /* submatrices of local coarse problem */ 1765 Mat S_VV,S_CV,S_VC,S_CC; 1766 /* working matrices */ 1767 Mat C_CR; 1768 /* additional working stuff */ 1769 PC pc_R; 1770 Mat F; 1771 Vec dummy_vec; 1772 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1773 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1774 PetscScalar *work; 1775 PetscInt *idx_V_B; 1776 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1777 PetscInt i,n_R,n_D,n_B; 1778 1779 /* some shortcuts to scalars */ 1780 PetscScalar one=1.0,m_one=-1.0; 1781 1782 PetscFunctionBegin; 1783 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1784 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1785 } 1786 1787 /* Set Non-overlapping dimensions */ 1788 n_vertices = pcbddc->n_vertices; 1789 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1790 n_B = pcis->n_B; 1791 n_D = pcis->n - n_B; 1792 n_R = pcis->n - n_vertices; 1793 1794 /* vertices in boundary numbering */ 1795 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1796 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1797 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1798 1799 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1800 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1801 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1802 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1803 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1804 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1805 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1806 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1807 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1808 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1809 1810 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1811 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1812 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1813 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1814 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1815 lda_rhs = n_R; 1816 need_benign_correction = PETSC_FALSE; 1817 if (isLU || isILU || isCHOL) { 1818 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1819 } else if (sub_schurs && sub_schurs->reuse_solver) { 1820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1821 MatFactorType type; 1822 1823 F = reuse_solver->F; 1824 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1825 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1826 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1827 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1828 } else { 1829 F = NULL; 1830 } 1831 1832 /* allocate workspace */ 1833 n = 0; 1834 if (n_constraints) { 1835 n += lda_rhs*n_constraints; 1836 } 1837 if (n_vertices) { 1838 n = PetscMax(2*lda_rhs*n_vertices,n); 1839 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1840 } 1841 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1842 1843 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1844 dummy_vec = NULL; 1845 if (need_benign_correction && lda_rhs != n_R && F) { 1846 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1847 } 1848 1849 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1850 if (n_constraints) { 1851 Mat M1,M2,M3,C_B; 1852 IS is_aux; 1853 PetscScalar *array,*array2; 1854 1855 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1856 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1857 1858 /* Extract constraints on R nodes: C_{CR} */ 1859 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1860 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1861 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1862 1863 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1864 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1865 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1866 for (i=0;i<n_constraints;i++) { 1867 const PetscScalar *row_cmat_values; 1868 const PetscInt *row_cmat_indices; 1869 PetscInt size_of_constraint,j; 1870 1871 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1872 for (j=0;j<size_of_constraint;j++) { 1873 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1874 } 1875 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1876 } 1877 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1878 if (F) { 1879 Mat B; 1880 1881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1882 if (need_benign_correction) { 1883 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1884 1885 /* rhs is already zero on interior dofs, no need to change the rhs */ 1886 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1887 } 1888 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1889 if (need_benign_correction) { 1890 PetscScalar *marr; 1891 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1892 1893 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1894 if (lda_rhs != n_R) { 1895 for (i=0;i<n_constraints;i++) { 1896 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1897 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1898 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1899 } 1900 } else { 1901 for (i=0;i<n_constraints;i++) { 1902 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1903 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1904 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1905 } 1906 } 1907 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1908 } 1909 ierr = MatDestroy(&B);CHKERRQ(ierr); 1910 } else { 1911 PetscScalar *marr; 1912 1913 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1914 for (i=0;i<n_constraints;i++) { 1915 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1916 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1917 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1919 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1920 } 1921 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1922 } 1923 if (!pcbddc->switch_static) { 1924 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1925 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1926 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1927 for (i=0;i<n_constraints;i++) { 1928 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1929 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1930 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1931 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1932 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1933 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1934 } 1935 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1936 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1937 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1938 } else { 1939 if (lda_rhs != n_R) { 1940 IS dummy; 1941 1942 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1943 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1944 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1945 } else { 1946 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1947 pcbddc->local_auxmat2 = local_auxmat2_R; 1948 } 1949 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1950 } 1951 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1952 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1953 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1954 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1955 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1956 if (isCHOL) { 1957 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1958 } else { 1959 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1960 } 1961 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1962 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1963 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1964 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1965 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1966 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1967 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1968 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1969 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1970 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1971 } 1972 1973 /* Get submatrices from subdomain matrix */ 1974 if (n_vertices) { 1975 IS is_aux; 1976 1977 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1978 IS tis; 1979 1980 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1981 ierr = ISSort(tis);CHKERRQ(ierr); 1982 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1983 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1984 } else { 1985 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1986 } 1987 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1988 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1989 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1990 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1991 } 1992 1993 /* Matrix of coarse basis functions (local) */ 1994 if (pcbddc->coarse_phi_B) { 1995 PetscInt on_B,on_primal,on_D=n_D; 1996 if (pcbddc->coarse_phi_D) { 1997 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1998 } 1999 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2000 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2001 PetscScalar *marray; 2002 2003 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2004 ierr = PetscFree(marray);CHKERRQ(ierr); 2005 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2006 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2007 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2008 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2009 } 2010 } 2011 2012 if (!pcbddc->coarse_phi_B) { 2013 PetscScalar *marray; 2014 2015 n = n_B*pcbddc->local_primal_size; 2016 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2017 n += n_D*pcbddc->local_primal_size; 2018 } 2019 if (!pcbddc->symmetric_primal) { 2020 n *= 2; 2021 } 2022 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2023 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2024 n = n_B*pcbddc->local_primal_size; 2025 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2026 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2027 n += n_D*pcbddc->local_primal_size; 2028 } 2029 if (!pcbddc->symmetric_primal) { 2030 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2031 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2032 n = n_B*pcbddc->local_primal_size; 2033 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2034 } 2035 } else { 2036 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2037 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2038 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2039 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2040 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2041 } 2042 } 2043 } 2044 2045 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2046 p0_lidx_I = NULL; 2047 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2048 const PetscInt *idxs; 2049 2050 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2051 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2052 for (i=0;i<pcbddc->benign_n;i++) { 2053 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2054 } 2055 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2056 } 2057 2058 /* vertices */ 2059 if (n_vertices) { 2060 2061 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2062 2063 if (n_R) { 2064 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2065 PetscBLASInt B_N,B_one = 1; 2066 PetscScalar *x,*y; 2067 PetscBool isseqaij; 2068 2069 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2070 if (need_benign_correction) { 2071 ISLocalToGlobalMapping RtoN; 2072 IS is_p0; 2073 PetscInt *idxs_p0,n; 2074 2075 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2076 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2077 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2078 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2079 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2080 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2081 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2082 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2083 } 2084 2085 if (lda_rhs == n_R) { 2086 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2087 } else { 2088 PetscScalar *av,*array; 2089 const PetscInt *xadj,*adjncy; 2090 PetscInt n; 2091 PetscBool flg_row; 2092 2093 array = work+lda_rhs*n_vertices; 2094 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2095 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2096 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2097 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2098 for (i=0;i<n;i++) { 2099 PetscInt j; 2100 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2101 } 2102 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2103 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2105 } 2106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2107 if (need_benign_correction) { 2108 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2109 PetscScalar *marr; 2110 2111 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2112 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2113 2114 | 0 0 0 | (V) 2115 L = | 0 0 -1 | (P-p0) 2116 | 0 0 -1 | (p0) 2117 2118 */ 2119 for (i=0;i<reuse_solver->benign_n;i++) { 2120 const PetscScalar *vals; 2121 const PetscInt *idxs,*idxs_zero; 2122 PetscInt n,j,nz; 2123 2124 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2125 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2126 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2127 for (j=0;j<n;j++) { 2128 PetscScalar val = vals[j]; 2129 PetscInt k,col = idxs[j]; 2130 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2131 } 2132 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2133 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2134 } 2135 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2136 } 2137 if (F) { 2138 /* need to correct the rhs */ 2139 if (need_benign_correction) { 2140 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2141 PetscScalar *marr; 2142 2143 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2144 if (lda_rhs != n_R) { 2145 for (i=0;i<n_vertices;i++) { 2146 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2147 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2148 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2149 } 2150 } else { 2151 for (i=0;i<n_vertices;i++) { 2152 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2153 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2154 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2155 } 2156 } 2157 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2158 } 2159 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2160 /* need to correct the solution */ 2161 if (need_benign_correction) { 2162 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2163 PetscScalar *marr; 2164 2165 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2166 if (lda_rhs != n_R) { 2167 for (i=0;i<n_vertices;i++) { 2168 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2169 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2170 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2171 } 2172 } else { 2173 for (i=0;i<n_vertices;i++) { 2174 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2175 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2176 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2177 } 2178 } 2179 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2180 } 2181 } else { 2182 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2183 for (i=0;i<n_vertices;i++) { 2184 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2185 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2186 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2187 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2188 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2189 } 2190 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2191 } 2192 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2193 /* S_VV and S_CV */ 2194 if (n_constraints) { 2195 Mat B; 2196 2197 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2198 for (i=0;i<n_vertices;i++) { 2199 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2200 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2201 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2202 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2203 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2204 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2205 } 2206 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2207 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2208 ierr = MatDestroy(&B);CHKERRQ(ierr); 2209 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2210 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2211 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2212 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2213 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2214 ierr = MatDestroy(&B);CHKERRQ(ierr); 2215 } 2216 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2217 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2218 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2219 } 2220 if (lda_rhs != n_R) { 2221 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2222 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2223 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2224 } 2225 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2226 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2227 if (need_benign_correction) { 2228 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2229 PetscScalar *marr,*sums; 2230 2231 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2232 ierr = MatDenseGetArray(S_VVt,&marr); 2233 for (i=0;i<reuse_solver->benign_n;i++) { 2234 const PetscScalar *vals; 2235 const PetscInt *idxs,*idxs_zero; 2236 PetscInt n,j,nz; 2237 2238 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2239 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2240 for (j=0;j<n_vertices;j++) { 2241 PetscInt k; 2242 sums[j] = 0.; 2243 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2244 } 2245 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2246 for (j=0;j<n;j++) { 2247 PetscScalar val = vals[j]; 2248 PetscInt k; 2249 for (k=0;k<n_vertices;k++) { 2250 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2251 } 2252 } 2253 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2254 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2255 } 2256 ierr = PetscFree(sums);CHKERRQ(ierr); 2257 ierr = MatDenseRestoreArray(S_VVt,&marr); 2258 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2259 } 2260 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2261 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2262 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2263 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2264 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2265 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2266 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2267 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2268 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2269 } else { 2270 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2271 } 2272 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2273 2274 /* coarse basis functions */ 2275 for (i=0;i<n_vertices;i++) { 2276 PetscScalar *y; 2277 2278 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2279 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2280 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2281 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2282 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2283 y[n_B*i+idx_V_B[i]] = 1.0; 2284 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2285 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2286 2287 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2288 PetscInt j; 2289 2290 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2291 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2292 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2293 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2294 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2295 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2296 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2297 } 2298 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2299 } 2300 /* if n_R == 0 the object is not destroyed */ 2301 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2302 } 2303 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2304 2305 if (n_constraints) { 2306 Mat B; 2307 2308 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2309 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2310 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2311 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2312 if (n_vertices) { 2313 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2314 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2315 } else { 2316 Mat S_VCt; 2317 2318 if (lda_rhs != n_R) { 2319 ierr = MatDestroy(&B);CHKERRQ(ierr); 2320 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2321 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2322 } 2323 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2324 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2325 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2326 } 2327 } 2328 ierr = MatDestroy(&B);CHKERRQ(ierr); 2329 /* coarse basis functions */ 2330 for (i=0;i<n_constraints;i++) { 2331 PetscScalar *y; 2332 2333 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2334 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2335 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2336 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2337 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2338 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2339 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2340 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2341 PetscInt j; 2342 2343 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2344 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2345 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2346 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2347 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2348 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2349 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2350 } 2351 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2352 } 2353 } 2354 if (n_constraints) { 2355 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2356 } 2357 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2358 2359 /* coarse matrix entries relative to B_0 */ 2360 if (pcbddc->benign_n) { 2361 Mat B0_B,B0_BPHI; 2362 IS is_dummy; 2363 PetscScalar *data; 2364 PetscInt j; 2365 2366 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2367 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2368 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2369 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2370 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2371 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2372 for (j=0;j<pcbddc->benign_n;j++) { 2373 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2374 for (i=0;i<pcbddc->local_primal_size;i++) { 2375 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2376 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2377 } 2378 } 2379 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2380 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2381 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2382 } 2383 2384 /* compute other basis functions for non-symmetric problems */ 2385 if (!pcbddc->symmetric_primal) { 2386 Mat B_V=NULL,B_C=NULL; 2387 PetscScalar *marray; 2388 2389 if (n_constraints) { 2390 Mat S_CCT,C_CRT; 2391 2392 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2393 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2394 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2395 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2396 if (n_vertices) { 2397 Mat S_VCT; 2398 2399 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2400 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2401 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2402 } 2403 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2404 } else { 2405 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2406 } 2407 if (n_vertices && n_R) { 2408 PetscScalar *av,*marray; 2409 const PetscInt *xadj,*adjncy; 2410 PetscInt n; 2411 PetscBool flg_row; 2412 2413 /* B_V = B_V - A_VR^T */ 2414 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2415 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2416 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2417 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2418 for (i=0;i<n;i++) { 2419 PetscInt j; 2420 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2421 } 2422 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2423 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2424 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2425 } 2426 2427 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2428 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2429 for (i=0;i<n_vertices;i++) { 2430 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2431 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2432 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2433 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2434 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2435 } 2436 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2437 if (B_C) { 2438 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2439 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2440 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2441 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2442 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2443 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2444 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2445 } 2446 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2447 } 2448 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2449 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2450 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2451 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2452 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2453 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2454 } 2455 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2456 /* coarse basis functions */ 2457 for (i=0;i<pcbddc->local_primal_size;i++) { 2458 PetscScalar *y; 2459 2460 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2461 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2462 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2463 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2464 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2465 if (i<n_vertices) { 2466 y[n_B*i+idx_V_B[i]] = 1.0; 2467 } 2468 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2469 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2470 2471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2472 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2473 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2474 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2475 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2476 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2477 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2478 } 2479 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2480 } 2481 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2482 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2483 } 2484 /* free memory */ 2485 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2486 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2487 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2488 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2489 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2490 ierr = PetscFree(work);CHKERRQ(ierr); 2491 if (n_vertices) { 2492 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2493 } 2494 if (n_constraints) { 2495 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2496 } 2497 /* Checking coarse_sub_mat and coarse basis functios */ 2498 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2499 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2500 if (pcbddc->dbg_flag) { 2501 Mat coarse_sub_mat; 2502 Mat AUXMAT,TM1,TM2,TM3,TM4; 2503 Mat coarse_phi_D,coarse_phi_B; 2504 Mat coarse_psi_D,coarse_psi_B; 2505 Mat A_II,A_BB,A_IB,A_BI; 2506 Mat C_B,CPHI; 2507 IS is_dummy; 2508 Vec mones; 2509 MatType checkmattype=MATSEQAIJ; 2510 PetscReal real_value; 2511 2512 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2513 Mat A; 2514 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2515 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2516 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2517 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2518 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2519 ierr = MatDestroy(&A);CHKERRQ(ierr); 2520 } else { 2521 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2522 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2523 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2524 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2525 } 2526 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2527 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2528 if (!pcbddc->symmetric_primal) { 2529 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2530 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2531 } 2532 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2533 2534 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2537 if (!pcbddc->symmetric_primal) { 2538 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2539 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2540 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2541 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2542 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2543 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2544 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2545 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2546 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2547 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2548 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2549 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2550 } else { 2551 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2552 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2553 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2554 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2555 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2556 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2557 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2558 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2559 } 2560 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2561 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2562 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2563 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2564 if (pcbddc->benign_n) { 2565 Mat B0_B,B0_BPHI; 2566 PetscScalar *data,*data2; 2567 PetscInt j; 2568 2569 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2570 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2571 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2572 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2573 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2574 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2575 for (j=0;j<pcbddc->benign_n;j++) { 2576 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2577 for (i=0;i<pcbddc->local_primal_size;i++) { 2578 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2579 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2580 } 2581 } 2582 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2583 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2584 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2585 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2586 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2587 } 2588 #if 0 2589 { 2590 PetscViewer viewer; 2591 char filename[256]; 2592 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2593 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2594 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2595 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2596 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2597 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2598 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2599 if (save_change) { 2600 Mat phi_B; 2601 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2602 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2603 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2604 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2605 } else { 2606 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2607 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2608 } 2609 if (pcbddc->coarse_phi_D) { 2610 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2611 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2612 } 2613 if (pcbddc->coarse_psi_B) { 2614 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2615 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2616 } 2617 if (pcbddc->coarse_psi_D) { 2618 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2619 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2620 } 2621 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2622 } 2623 #endif 2624 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2625 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2626 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2628 2629 /* check constraints */ 2630 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2631 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2632 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2633 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2634 } else { 2635 PetscScalar *data; 2636 Mat tmat; 2637 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2639 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2640 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2641 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2642 } 2643 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2644 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2645 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2646 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2648 if (!pcbddc->symmetric_primal) { 2649 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2650 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2651 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2652 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2654 } 2655 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2656 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2657 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2658 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2660 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2661 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2662 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2663 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2664 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2665 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2666 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2667 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2668 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2669 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2670 if (!pcbddc->symmetric_primal) { 2671 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2672 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2673 } 2674 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2675 } 2676 /* get back data */ 2677 *coarse_submat_vals_n = coarse_submat_vals; 2678 PetscFunctionReturn(0); 2679 } 2680 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2683 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2684 { 2685 Mat *work_mat; 2686 IS isrow_s,iscol_s; 2687 PetscBool rsorted,csorted; 2688 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2689 PetscErrorCode ierr; 2690 2691 PetscFunctionBegin; 2692 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2693 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2694 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2695 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2696 2697 if (!rsorted) { 2698 const PetscInt *idxs; 2699 PetscInt *idxs_sorted,i; 2700 2701 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2702 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2703 for (i=0;i<rsize;i++) { 2704 idxs_perm_r[i] = i; 2705 } 2706 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2707 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2708 for (i=0;i<rsize;i++) { 2709 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2710 } 2711 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2712 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2713 } else { 2714 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2715 isrow_s = isrow; 2716 } 2717 2718 if (!csorted) { 2719 if (isrow == iscol) { 2720 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2721 iscol_s = isrow_s; 2722 } else { 2723 const PetscInt *idxs; 2724 PetscInt *idxs_sorted,i; 2725 2726 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2727 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2728 for (i=0;i<csize;i++) { 2729 idxs_perm_c[i] = i; 2730 } 2731 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2732 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2733 for (i=0;i<csize;i++) { 2734 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2735 } 2736 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2738 } 2739 } else { 2740 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2741 iscol_s = iscol; 2742 } 2743 2744 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2745 2746 if (!rsorted || !csorted) { 2747 Mat new_mat; 2748 IS is_perm_r,is_perm_c; 2749 2750 if (!rsorted) { 2751 PetscInt *idxs_r,i; 2752 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2753 for (i=0;i<rsize;i++) { 2754 idxs_r[idxs_perm_r[i]] = i; 2755 } 2756 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2757 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2758 } else { 2759 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2760 } 2761 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2762 2763 if (!csorted) { 2764 if (isrow_s == iscol_s) { 2765 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2766 is_perm_c = is_perm_r; 2767 } else { 2768 PetscInt *idxs_c,i; 2769 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2770 for (i=0;i<csize;i++) { 2771 idxs_c[idxs_perm_c[i]] = i; 2772 } 2773 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2774 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2775 } 2776 } else { 2777 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2778 } 2779 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2780 2781 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2782 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2783 work_mat[0] = new_mat; 2784 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2785 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2786 } 2787 2788 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2789 *B = work_mat[0]; 2790 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2791 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2792 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2798 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2799 { 2800 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2801 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2802 Mat new_mat; 2803 IS is_local,is_global; 2804 PetscInt local_size; 2805 PetscBool isseqaij; 2806 PetscErrorCode ierr; 2807 2808 PetscFunctionBegin; 2809 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2810 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2811 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2812 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2813 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2814 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2815 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2816 2817 /* check */ 2818 if (pcbddc->dbg_flag) { 2819 Vec x,x_change; 2820 PetscReal error; 2821 2822 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2823 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2824 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2825 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2826 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2827 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2828 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2829 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2831 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2832 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2833 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2834 ierr = VecDestroy(&x);CHKERRQ(ierr); 2835 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2836 } 2837 2838 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2839 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2840 if (isseqaij) { 2841 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2842 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2843 } else { 2844 Mat work_mat; 2845 2846 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2847 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2848 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2849 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2850 } 2851 if (matis->A->symmetric_set) { 2852 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2853 #if !defined(PETSC_USE_COMPLEX) 2854 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2855 #endif 2856 } 2857 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2863 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2864 { 2865 PC_IS* pcis = (PC_IS*)(pc->data); 2866 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2867 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2868 PetscInt *idx_R_local=NULL; 2869 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2870 PetscInt vbs,bs; 2871 PetscBT bitmask=NULL; 2872 PetscErrorCode ierr; 2873 2874 PetscFunctionBegin; 2875 /* 2876 No need to setup local scatters if 2877 - primal space is unchanged 2878 AND 2879 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2880 AND 2881 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2882 */ 2883 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2884 PetscFunctionReturn(0); 2885 } 2886 /* destroy old objects */ 2887 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2888 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2889 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2890 /* Set Non-overlapping dimensions */ 2891 n_B = pcis->n_B; 2892 n_D = pcis->n - n_B; 2893 n_vertices = pcbddc->n_vertices; 2894 2895 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2896 2897 /* create auxiliary bitmask and allocate workspace */ 2898 if (!sub_schurs || !sub_schurs->reuse_solver) { 2899 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2900 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2901 for (i=0;i<n_vertices;i++) { 2902 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2903 } 2904 2905 for (i=0, n_R=0; i<pcis->n; i++) { 2906 if (!PetscBTLookup(bitmask,i)) { 2907 idx_R_local[n_R++] = i; 2908 } 2909 } 2910 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2911 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2912 2913 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2914 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2915 } 2916 2917 /* Block code */ 2918 vbs = 1; 2919 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2920 if (bs>1 && !(n_vertices%bs)) { 2921 PetscBool is_blocked = PETSC_TRUE; 2922 PetscInt *vary; 2923 if (!sub_schurs || !sub_schurs->reuse_solver) { 2924 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2925 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2926 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2927 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2928 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2929 for (i=0; i<pcis->n/bs; i++) { 2930 if (vary[i]!=0 && vary[i]!=bs) { 2931 is_blocked = PETSC_FALSE; 2932 break; 2933 } 2934 } 2935 ierr = PetscFree(vary);CHKERRQ(ierr); 2936 } else { 2937 /* Verify directly the R set */ 2938 for (i=0; i<n_R/bs; i++) { 2939 PetscInt j,node=idx_R_local[bs*i]; 2940 for (j=1; j<bs; j++) { 2941 if (node != idx_R_local[bs*i+j]-j) { 2942 is_blocked = PETSC_FALSE; 2943 break; 2944 } 2945 } 2946 } 2947 } 2948 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2949 vbs = bs; 2950 for (i=0;i<n_R/vbs;i++) { 2951 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2952 } 2953 } 2954 } 2955 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2956 if (sub_schurs && sub_schurs->reuse_solver) { 2957 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2958 2959 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2960 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2961 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2962 reuse_solver->is_R = pcbddc->is_R_local; 2963 } else { 2964 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2965 } 2966 2967 /* print some info if requested */ 2968 if (pcbddc->dbg_flag) { 2969 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2970 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2971 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2976 } 2977 2978 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2979 if (!sub_schurs || !sub_schurs->reuse_solver) { 2980 IS is_aux1,is_aux2; 2981 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2982 2983 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2984 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2985 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2986 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2987 for (i=0; i<n_D; i++) { 2988 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2989 } 2990 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2991 for (i=0, j=0; i<n_R; i++) { 2992 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2993 aux_array1[j++] = i; 2994 } 2995 } 2996 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2997 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2998 for (i=0, j=0; i<n_B; i++) { 2999 if (!PetscBTLookup(bitmask,is_indices[i])) { 3000 aux_array2[j++] = i; 3001 } 3002 } 3003 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3004 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3005 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3006 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3007 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3008 3009 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3010 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3011 for (i=0, j=0; i<n_R; i++) { 3012 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3013 aux_array1[j++] = i; 3014 } 3015 } 3016 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3017 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3018 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3019 } 3020 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3021 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3022 } else { 3023 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3024 IS tis; 3025 PetscInt schur_size; 3026 3027 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3028 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3029 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3030 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3031 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3032 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3033 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3034 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3035 } 3036 } 3037 PetscFunctionReturn(0); 3038 } 3039 3040 3041 #undef __FUNCT__ 3042 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3043 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3044 { 3045 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3046 PC_IS *pcis = (PC_IS*)pc->data; 3047 PC pc_temp; 3048 Mat A_RR; 3049 MatReuse reuse; 3050 PetscScalar m_one = -1.0; 3051 PetscReal value; 3052 PetscInt n_D,n_R; 3053 PetscBool check_corr[2],issbaij; 3054 PetscErrorCode ierr; 3055 /* prefixes stuff */ 3056 char dir_prefix[256],neu_prefix[256],str_level[16]; 3057 size_t len; 3058 3059 PetscFunctionBegin; 3060 3061 /* compute prefixes */ 3062 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3063 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3064 if (!pcbddc->current_level) { 3065 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3066 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3067 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3068 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3069 } else { 3070 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3071 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3072 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3073 len -= 15; /* remove "pc_bddc_coarse_" */ 3074 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3075 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3076 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3077 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3078 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3079 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3080 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3081 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3082 } 3083 3084 /* DIRICHLET PROBLEM */ 3085 if (dirichlet) { 3086 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3087 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3088 if (!sub_schurs || !sub_schurs->reuse_solver) { 3089 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3090 } 3091 if (pcbddc->dbg_flag) { 3092 Mat A_IIn; 3093 3094 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3095 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3096 pcis->A_II = A_IIn; 3097 } 3098 } 3099 if (pcbddc->local_mat->symmetric_set) { 3100 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3101 } 3102 /* Matrix for Dirichlet problem is pcis->A_II */ 3103 n_D = pcis->n - pcis->n_B; 3104 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3105 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3106 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3107 /* default */ 3108 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3109 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3110 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3111 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3112 if (issbaij) { 3113 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3114 } else { 3115 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3116 } 3117 /* Allow user's customization */ 3118 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3119 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3120 } 3121 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3122 if (sub_schurs && sub_schurs->reuse_solver) { 3123 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3124 3125 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3126 } 3127 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3128 if (!n_D) { 3129 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3130 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3131 } 3132 /* Set Up KSP for Dirichlet problem of BDDC */ 3133 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3134 /* set ksp_D into pcis data */ 3135 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3136 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3137 pcis->ksp_D = pcbddc->ksp_D; 3138 } 3139 3140 /* NEUMANN PROBLEM */ 3141 A_RR = 0; 3142 if (neumann) { 3143 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3144 PetscInt ibs,mbs; 3145 PetscBool issbaij; 3146 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3147 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3148 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3149 if (pcbddc->ksp_R) { /* already created ksp */ 3150 PetscInt nn_R; 3151 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3152 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3153 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3154 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3155 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3156 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3157 reuse = MAT_INITIAL_MATRIX; 3158 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3159 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3160 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3161 reuse = MAT_INITIAL_MATRIX; 3162 } else { /* safe to reuse the matrix */ 3163 reuse = MAT_REUSE_MATRIX; 3164 } 3165 } 3166 /* last check */ 3167 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3168 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3169 reuse = MAT_INITIAL_MATRIX; 3170 } 3171 } else { /* first time, so we need to create the matrix */ 3172 reuse = MAT_INITIAL_MATRIX; 3173 } 3174 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3175 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3176 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3177 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3178 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3179 if (matis->A == pcbddc->local_mat) { 3180 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3181 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3182 } else { 3183 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3184 } 3185 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3186 if (matis->A == pcbddc->local_mat) { 3187 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3188 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3189 } else { 3190 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3191 } 3192 } 3193 /* extract A_RR */ 3194 if (sub_schurs && sub_schurs->reuse_solver) { 3195 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3196 3197 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3198 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3199 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3200 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3201 } else { 3202 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3203 } 3204 } else { 3205 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3206 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3207 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3208 } 3209 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3210 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3211 } 3212 if (pcbddc->local_mat->symmetric_set) { 3213 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3214 } 3215 if (!pcbddc->ksp_R) { /* create object if not present */ 3216 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3217 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3218 /* default */ 3219 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3220 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3221 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3222 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3223 if (issbaij) { 3224 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3225 } else { 3226 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3227 } 3228 /* Allow user's customization */ 3229 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3230 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3231 } 3232 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3233 if (!n_R) { 3234 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3235 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3236 } 3237 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3238 /* Reuse solver if it is present */ 3239 if (sub_schurs && sub_schurs->reuse_solver) { 3240 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3241 3242 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3243 } 3244 /* Set Up KSP for Neumann problem of BDDC */ 3245 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3246 } 3247 3248 if (pcbddc->dbg_flag) { 3249 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3250 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3252 } 3253 3254 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3255 check_corr[0] = check_corr[1] = PETSC_FALSE; 3256 if (pcbddc->NullSpace_corr[0]) { 3257 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3258 } 3259 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3260 check_corr[0] = PETSC_TRUE; 3261 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3262 } 3263 if (neumann && pcbddc->NullSpace_corr[2]) { 3264 check_corr[1] = PETSC_TRUE; 3265 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3266 } 3267 3268 /* check Dirichlet and Neumann solvers */ 3269 if (pcbddc->dbg_flag) { 3270 if (dirichlet) { /* Dirichlet */ 3271 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3272 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3273 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3274 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3275 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3277 if (check_corr[0]) { 3278 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3279 } 3280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3281 } 3282 if (neumann) { /* Neumann */ 3283 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3284 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3285 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3286 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3287 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3289 if (check_corr[1]) { 3290 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3291 } 3292 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3293 } 3294 } 3295 /* free Neumann problem's matrix */ 3296 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3302 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3303 { 3304 PetscErrorCode ierr; 3305 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3306 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3307 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3308 3309 PetscFunctionBegin; 3310 if (!reuse_solver) { 3311 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3312 } 3313 if (!pcbddc->switch_static) { 3314 if (applytranspose && pcbddc->local_auxmat1) { 3315 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3316 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3317 } 3318 if (!reuse_solver) { 3319 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3320 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3321 } else { 3322 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3323 3324 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3325 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3326 } 3327 } else { 3328 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3329 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3330 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3331 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3332 if (applytranspose && pcbddc->local_auxmat1) { 3333 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3334 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3335 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3336 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3337 } 3338 } 3339 if (!reuse_solver || pcbddc->switch_static) { 3340 if (applytranspose) { 3341 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3342 } else { 3343 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3344 } 3345 } else { 3346 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3347 3348 if (applytranspose) { 3349 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3350 } else { 3351 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3352 } 3353 } 3354 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3355 if (!pcbddc->switch_static) { 3356 if (!reuse_solver) { 3357 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3358 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3359 } else { 3360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3361 3362 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3363 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3364 } 3365 if (!applytranspose && pcbddc->local_auxmat1) { 3366 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3367 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3368 } 3369 } else { 3370 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3371 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3372 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3373 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3374 if (!applytranspose && pcbddc->local_auxmat1) { 3375 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3376 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3377 } 3378 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3379 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3380 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3381 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3382 } 3383 PetscFunctionReturn(0); 3384 } 3385 3386 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3387 #undef __FUNCT__ 3388 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3389 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3390 { 3391 PetscErrorCode ierr; 3392 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3393 PC_IS* pcis = (PC_IS*) (pc->data); 3394 const PetscScalar zero = 0.0; 3395 3396 PetscFunctionBegin; 3397 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3398 if (!pcbddc->benign_apply_coarse_only) { 3399 if (applytranspose) { 3400 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3401 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3402 } else { 3403 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3404 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3405 } 3406 } else { 3407 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3408 } 3409 3410 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3411 if (pcbddc->benign_n) { 3412 PetscScalar *array; 3413 PetscInt j; 3414 3415 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3416 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3417 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3418 } 3419 3420 /* start communications from local primal nodes to rhs of coarse solver */ 3421 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3422 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3423 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3424 3425 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3426 if (pcbddc->coarse_ksp) { 3427 Mat coarse_mat; 3428 Vec rhs,sol; 3429 MatNullSpace nullsp; 3430 PetscBool isbddc = PETSC_FALSE; 3431 3432 if (pcbddc->benign_have_null) { 3433 PC coarse_pc; 3434 3435 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3436 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3437 /* we need to propagate to coarser levels the need for a possible benign correction */ 3438 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3439 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3440 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3441 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3442 } 3443 } 3444 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3445 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3446 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3447 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3448 if (nullsp) { 3449 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3450 } 3451 if (applytranspose) { 3452 if (pcbddc->benign_apply_coarse_only) { 3453 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3454 } else { 3455 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3456 } 3457 } else { 3458 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3459 PC coarse_pc; 3460 3461 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3462 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3463 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3464 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3465 } else { 3466 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3467 } 3468 } 3469 /* we don't need the benign correction at coarser levels anymore */ 3470 if (pcbddc->benign_have_null && isbddc) { 3471 PC coarse_pc; 3472 PC_BDDC* coarsepcbddc; 3473 3474 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3475 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3476 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3477 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3478 } 3479 if (nullsp) { 3480 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3481 } 3482 } 3483 3484 /* Local solution on R nodes */ 3485 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3486 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3487 } 3488 /* communications from coarse sol to local primal nodes */ 3489 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3490 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3491 3492 /* Sum contributions from the two levels */ 3493 if (!pcbddc->benign_apply_coarse_only) { 3494 if (applytranspose) { 3495 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3496 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3497 } else { 3498 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3499 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3500 } 3501 /* store p0 */ 3502 if (pcbddc->benign_n) { 3503 PetscScalar *array; 3504 PetscInt j; 3505 3506 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3507 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3508 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3509 } 3510 } else { /* expand the coarse solution */ 3511 if (applytranspose) { 3512 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3513 } else { 3514 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3515 } 3516 } 3517 PetscFunctionReturn(0); 3518 } 3519 3520 #undef __FUNCT__ 3521 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3522 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3523 { 3524 PetscErrorCode ierr; 3525 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3526 PetscScalar *array; 3527 Vec from,to; 3528 3529 PetscFunctionBegin; 3530 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3531 from = pcbddc->coarse_vec; 3532 to = pcbddc->vec1_P; 3533 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3534 Vec tvec; 3535 3536 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3537 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3538 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3539 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3540 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3541 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3542 } 3543 } else { /* from local to global -> put data in coarse right hand side */ 3544 from = pcbddc->vec1_P; 3545 to = pcbddc->coarse_vec; 3546 } 3547 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3548 PetscFunctionReturn(0); 3549 } 3550 3551 #undef __FUNCT__ 3552 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3553 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3554 { 3555 PetscErrorCode ierr; 3556 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3557 PetscScalar *array; 3558 Vec from,to; 3559 3560 PetscFunctionBegin; 3561 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3562 from = pcbddc->coarse_vec; 3563 to = pcbddc->vec1_P; 3564 } else { /* from local to global -> put data in coarse right hand side */ 3565 from = pcbddc->vec1_P; 3566 to = pcbddc->coarse_vec; 3567 } 3568 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3569 if (smode == SCATTER_FORWARD) { 3570 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3571 Vec tvec; 3572 3573 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3574 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3575 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3576 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3577 } 3578 } else { 3579 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3580 ierr = VecResetArray(from);CHKERRQ(ierr); 3581 } 3582 } 3583 PetscFunctionReturn(0); 3584 } 3585 3586 /* uncomment for testing purposes */ 3587 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3588 #undef __FUNCT__ 3589 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3590 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3591 { 3592 PetscErrorCode ierr; 3593 PC_IS* pcis = (PC_IS*)(pc->data); 3594 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3595 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3596 /* one and zero */ 3597 PetscScalar one=1.0,zero=0.0; 3598 /* space to store constraints and their local indices */ 3599 PetscScalar *constraints_data; 3600 PetscInt *constraints_idxs,*constraints_idxs_B; 3601 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3602 PetscInt *constraints_n; 3603 /* iterators */ 3604 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3605 /* BLAS integers */ 3606 PetscBLASInt lwork,lierr; 3607 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3608 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3609 /* reuse */ 3610 PetscInt olocal_primal_size,olocal_primal_size_cc; 3611 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3612 /* change of basis */ 3613 PetscBool qr_needed; 3614 PetscBT change_basis,qr_needed_idx; 3615 /* auxiliary stuff */ 3616 PetscInt *nnz,*is_indices; 3617 PetscInt ncc; 3618 /* some quantities */ 3619 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3620 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3621 3622 PetscFunctionBegin; 3623 /* Destroy Mat objects computed previously */ 3624 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3625 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3626 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3627 /* save info on constraints from previous setup (if any) */ 3628 olocal_primal_size = pcbddc->local_primal_size; 3629 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3630 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3631 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3632 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3633 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3634 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3635 3636 if (!pcbddc->adaptive_selection) { 3637 IS ISForVertices,*ISForFaces,*ISForEdges; 3638 MatNullSpace nearnullsp; 3639 const Vec *nearnullvecs; 3640 Vec *localnearnullsp; 3641 PetscScalar *array; 3642 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3643 PetscBool nnsp_has_cnst; 3644 /* LAPACK working arrays for SVD or POD */ 3645 PetscBool skip_lapack,boolforchange; 3646 PetscScalar *work; 3647 PetscReal *singular_vals; 3648 #if defined(PETSC_USE_COMPLEX) 3649 PetscReal *rwork; 3650 #endif 3651 #if defined(PETSC_MISSING_LAPACK_GESVD) 3652 PetscScalar *temp_basis,*correlation_mat; 3653 #else 3654 PetscBLASInt dummy_int=1; 3655 PetscScalar dummy_scalar=1.; 3656 #endif 3657 3658 /* Get index sets for faces, edges and vertices from graph */ 3659 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3660 /* print some info */ 3661 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3662 PetscInt nv; 3663 3664 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3665 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3666 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3667 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3668 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3672 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3673 } 3674 3675 /* free unneeded index sets */ 3676 if (!pcbddc->use_vertices) { 3677 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3678 } 3679 if (!pcbddc->use_edges) { 3680 for (i=0;i<n_ISForEdges;i++) { 3681 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3682 } 3683 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3684 n_ISForEdges = 0; 3685 } 3686 if (!pcbddc->use_faces) { 3687 for (i=0;i<n_ISForFaces;i++) { 3688 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3689 } 3690 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3691 n_ISForFaces = 0; 3692 } 3693 3694 /* check if near null space is attached to global mat */ 3695 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3696 if (nearnullsp) { 3697 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3698 /* remove any stored info */ 3699 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3700 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3701 /* store information for BDDC solver reuse */ 3702 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3703 pcbddc->onearnullspace = nearnullsp; 3704 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3705 for (i=0;i<nnsp_size;i++) { 3706 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3707 } 3708 } else { /* if near null space is not provided BDDC uses constants by default */ 3709 nnsp_size = 0; 3710 nnsp_has_cnst = PETSC_TRUE; 3711 } 3712 /* get max number of constraints on a single cc */ 3713 max_constraints = nnsp_size; 3714 if (nnsp_has_cnst) max_constraints++; 3715 3716 /* 3717 Evaluate maximum storage size needed by the procedure 3718 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3719 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3720 There can be multiple constraints per connected component 3721 */ 3722 n_vertices = 0; 3723 if (ISForVertices) { 3724 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3725 } 3726 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3727 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3728 3729 total_counts = n_ISForFaces+n_ISForEdges; 3730 total_counts *= max_constraints; 3731 total_counts += n_vertices; 3732 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3733 3734 total_counts = 0; 3735 max_size_of_constraint = 0; 3736 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3737 IS used_is; 3738 if (i<n_ISForEdges) { 3739 used_is = ISForEdges[i]; 3740 } else { 3741 used_is = ISForFaces[i-n_ISForEdges]; 3742 } 3743 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3744 total_counts += j; 3745 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3746 } 3747 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3748 3749 /* get local part of global near null space vectors */ 3750 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3751 for (k=0;k<nnsp_size;k++) { 3752 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3753 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3754 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3755 } 3756 3757 /* whether or not to skip lapack calls */ 3758 skip_lapack = PETSC_TRUE; 3759 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3760 3761 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3762 if (!skip_lapack) { 3763 PetscScalar temp_work; 3764 3765 #if defined(PETSC_MISSING_LAPACK_GESVD) 3766 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3767 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3768 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3769 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3770 #if defined(PETSC_USE_COMPLEX) 3771 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3772 #endif 3773 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3774 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3775 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3776 lwork = -1; 3777 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3778 #if !defined(PETSC_USE_COMPLEX) 3779 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3780 #else 3781 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3782 #endif 3783 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3784 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3785 #else /* on missing GESVD */ 3786 /* SVD */ 3787 PetscInt max_n,min_n; 3788 max_n = max_size_of_constraint; 3789 min_n = max_constraints; 3790 if (max_size_of_constraint < max_constraints) { 3791 min_n = max_size_of_constraint; 3792 max_n = max_constraints; 3793 } 3794 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3795 #if defined(PETSC_USE_COMPLEX) 3796 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3797 #endif 3798 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3799 lwork = -1; 3800 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3801 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3802 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3803 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3804 #if !defined(PETSC_USE_COMPLEX) 3805 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3806 #else 3807 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3808 #endif 3809 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3810 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3811 #endif /* on missing GESVD */ 3812 /* Allocate optimal workspace */ 3813 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3814 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3815 } 3816 /* Now we can loop on constraining sets */ 3817 total_counts = 0; 3818 constraints_idxs_ptr[0] = 0; 3819 constraints_data_ptr[0] = 0; 3820 /* vertices */ 3821 if (n_vertices) { 3822 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3823 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3824 for (i=0;i<n_vertices;i++) { 3825 constraints_n[total_counts] = 1; 3826 constraints_data[total_counts] = 1.0; 3827 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3828 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3829 total_counts++; 3830 } 3831 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3832 n_vertices = total_counts; 3833 } 3834 3835 /* edges and faces */ 3836 total_counts_cc = total_counts; 3837 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3838 IS used_is; 3839 PetscBool idxs_copied = PETSC_FALSE; 3840 3841 if (ncc<n_ISForEdges) { 3842 used_is = ISForEdges[ncc]; 3843 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3844 } else { 3845 used_is = ISForFaces[ncc-n_ISForEdges]; 3846 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3847 } 3848 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3849 3850 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3851 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3852 /* change of basis should not be performed on local periodic nodes */ 3853 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3854 if (nnsp_has_cnst) { 3855 PetscScalar quad_value; 3856 3857 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3858 idxs_copied = PETSC_TRUE; 3859 3860 if (!pcbddc->use_nnsp_true) { 3861 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3862 } else { 3863 quad_value = 1.0; 3864 } 3865 for (j=0;j<size_of_constraint;j++) { 3866 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3867 } 3868 temp_constraints++; 3869 total_counts++; 3870 } 3871 for (k=0;k<nnsp_size;k++) { 3872 PetscReal real_value; 3873 PetscScalar *ptr_to_data; 3874 3875 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3876 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3877 for (j=0;j<size_of_constraint;j++) { 3878 ptr_to_data[j] = array[is_indices[j]]; 3879 } 3880 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3881 /* check if array is null on the connected component */ 3882 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3883 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3884 if (real_value > 0.0) { /* keep indices and values */ 3885 temp_constraints++; 3886 total_counts++; 3887 if (!idxs_copied) { 3888 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3889 idxs_copied = PETSC_TRUE; 3890 } 3891 } 3892 } 3893 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3894 valid_constraints = temp_constraints; 3895 if (!pcbddc->use_nnsp_true && temp_constraints) { 3896 if (temp_constraints == 1) { /* just normalize the constraint */ 3897 PetscScalar norm,*ptr_to_data; 3898 3899 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3901 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3902 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3903 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3904 } else { /* perform SVD */ 3905 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3906 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3907 3908 #if defined(PETSC_MISSING_LAPACK_GESVD) 3909 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3910 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3911 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3912 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3913 from that computed using LAPACKgesvd 3914 -> This is due to a different computation of eigenvectors in LAPACKheev 3915 -> The quality of the POD-computed basis will be the same */ 3916 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3917 /* Store upper triangular part of correlation matrix */ 3918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3919 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3920 for (j=0;j<temp_constraints;j++) { 3921 for (k=0;k<j+1;k++) { 3922 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3923 } 3924 } 3925 /* compute eigenvalues and eigenvectors of correlation matrix */ 3926 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3927 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3928 #if !defined(PETSC_USE_COMPLEX) 3929 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3930 #else 3931 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3932 #endif 3933 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3934 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3935 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3936 j = 0; 3937 while (j < temp_constraints && singular_vals[j] < tol) j++; 3938 total_counts = total_counts-j; 3939 valid_constraints = temp_constraints-j; 3940 /* scale and copy POD basis into used quadrature memory */ 3941 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3942 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3943 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3947 if (j<temp_constraints) { 3948 PetscInt ii; 3949 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3950 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3951 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3953 for (k=0;k<temp_constraints-j;k++) { 3954 for (ii=0;ii<size_of_constraint;ii++) { 3955 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3956 } 3957 } 3958 } 3959 #else /* on missing GESVD */ 3960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3961 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3963 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3964 #if !defined(PETSC_USE_COMPLEX) 3965 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3966 #else 3967 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3968 #endif 3969 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3970 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3971 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3972 k = temp_constraints; 3973 if (k > size_of_constraint) k = size_of_constraint; 3974 j = 0; 3975 while (j < k && singular_vals[k-j-1] < tol) j++; 3976 valid_constraints = k-j; 3977 total_counts = total_counts-temp_constraints+valid_constraints; 3978 #endif /* on missing GESVD */ 3979 } 3980 } 3981 /* update pointers information */ 3982 if (valid_constraints) { 3983 constraints_n[total_counts_cc] = valid_constraints; 3984 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3985 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3986 /* set change_of_basis flag */ 3987 if (boolforchange) { 3988 PetscBTSet(change_basis,total_counts_cc); 3989 } 3990 total_counts_cc++; 3991 } 3992 } 3993 /* free workspace */ 3994 if (!skip_lapack) { 3995 ierr = PetscFree(work);CHKERRQ(ierr); 3996 #if defined(PETSC_USE_COMPLEX) 3997 ierr = PetscFree(rwork);CHKERRQ(ierr); 3998 #endif 3999 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4000 #if defined(PETSC_MISSING_LAPACK_GESVD) 4001 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4002 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4003 #endif 4004 } 4005 for (k=0;k<nnsp_size;k++) { 4006 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4007 } 4008 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4009 /* free index sets of faces, edges and vertices */ 4010 for (i=0;i<n_ISForFaces;i++) { 4011 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4012 } 4013 if (n_ISForFaces) { 4014 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4015 } 4016 for (i=0;i<n_ISForEdges;i++) { 4017 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4018 } 4019 if (n_ISForEdges) { 4020 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4021 } 4022 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4023 } else { 4024 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4025 4026 total_counts = 0; 4027 n_vertices = 0; 4028 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4029 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4030 } 4031 max_constraints = 0; 4032 total_counts_cc = 0; 4033 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4034 total_counts += pcbddc->adaptive_constraints_n[i]; 4035 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4036 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4037 } 4038 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4039 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4040 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4041 constraints_data = pcbddc->adaptive_constraints_data; 4042 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4043 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4044 total_counts_cc = 0; 4045 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4046 if (pcbddc->adaptive_constraints_n[i]) { 4047 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4048 } 4049 } 4050 #if 0 4051 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4052 for (i=0;i<total_counts_cc;i++) { 4053 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4054 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4055 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4056 printf(" %d",constraints_idxs[j]); 4057 } 4058 printf("\n"); 4059 printf("number of cc: %d\n",constraints_n[i]); 4060 } 4061 for (i=0;i<n_vertices;i++) { 4062 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4063 } 4064 for (i=0;i<sub_schurs->n_subs;i++) { 4065 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4066 } 4067 #endif 4068 4069 max_size_of_constraint = 0; 4070 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4071 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4072 /* Change of basis */ 4073 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4074 if (pcbddc->use_change_of_basis) { 4075 for (i=0;i<sub_schurs->n_subs;i++) { 4076 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4077 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4078 } 4079 } 4080 } 4081 } 4082 pcbddc->local_primal_size = total_counts; 4083 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4084 4085 /* map constraints_idxs in boundary numbering */ 4086 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4087 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4088 4089 /* Create constraint matrix */ 4090 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4091 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4092 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4093 4094 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4095 /* determine if a QR strategy is needed for change of basis */ 4096 qr_needed = PETSC_FALSE; 4097 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4098 total_primal_vertices=0; 4099 pcbddc->local_primal_size_cc = 0; 4100 for (i=0;i<total_counts_cc;i++) { 4101 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4102 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4103 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4104 pcbddc->local_primal_size_cc += 1; 4105 } else if (PetscBTLookup(change_basis,i)) { 4106 for (k=0;k<constraints_n[i];k++) { 4107 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4108 } 4109 pcbddc->local_primal_size_cc += constraints_n[i]; 4110 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4111 PetscBTSet(qr_needed_idx,i); 4112 qr_needed = PETSC_TRUE; 4113 } 4114 } else { 4115 pcbddc->local_primal_size_cc += 1; 4116 } 4117 } 4118 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4119 pcbddc->n_vertices = total_primal_vertices; 4120 /* permute indices in order to have a sorted set of vertices */ 4121 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4122 4123 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4124 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4125 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4126 4127 /* nonzero structure of constraint matrix */ 4128 /* and get reference dof for local constraints */ 4129 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4130 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4131 4132 j = total_primal_vertices; 4133 total_counts = total_primal_vertices; 4134 cum = total_primal_vertices; 4135 for (i=n_vertices;i<total_counts_cc;i++) { 4136 if (!PetscBTLookup(change_basis,i)) { 4137 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4138 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4139 cum++; 4140 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4141 for (k=0;k<constraints_n[i];k++) { 4142 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4143 nnz[j+k] = size_of_constraint; 4144 } 4145 j += constraints_n[i]; 4146 } 4147 } 4148 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4149 ierr = PetscFree(nnz);CHKERRQ(ierr); 4150 4151 /* set values in constraint matrix */ 4152 for (i=0;i<total_primal_vertices;i++) { 4153 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4154 } 4155 total_counts = total_primal_vertices; 4156 for (i=n_vertices;i<total_counts_cc;i++) { 4157 if (!PetscBTLookup(change_basis,i)) { 4158 PetscInt *cols; 4159 4160 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4161 cols = constraints_idxs+constraints_idxs_ptr[i]; 4162 for (k=0;k<constraints_n[i];k++) { 4163 PetscInt row = total_counts+k; 4164 PetscScalar *vals; 4165 4166 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4167 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4168 } 4169 total_counts += constraints_n[i]; 4170 } 4171 } 4172 /* assembling */ 4173 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4174 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4175 4176 /* 4177 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4178 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4179 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4180 */ 4181 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4182 if (pcbddc->use_change_of_basis) { 4183 /* dual and primal dofs on a single cc */ 4184 PetscInt dual_dofs,primal_dofs; 4185 /* working stuff for GEQRF */ 4186 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4187 PetscBLASInt lqr_work; 4188 /* working stuff for UNGQR */ 4189 PetscScalar *gqr_work,lgqr_work_t; 4190 PetscBLASInt lgqr_work; 4191 /* working stuff for TRTRS */ 4192 PetscScalar *trs_rhs; 4193 PetscBLASInt Blas_NRHS; 4194 /* pointers for values insertion into change of basis matrix */ 4195 PetscInt *start_rows,*start_cols; 4196 PetscScalar *start_vals; 4197 /* working stuff for values insertion */ 4198 PetscBT is_primal; 4199 PetscInt *aux_primal_numbering_B; 4200 /* matrix sizes */ 4201 PetscInt global_size,local_size; 4202 /* temporary change of basis */ 4203 Mat localChangeOfBasisMatrix; 4204 /* extra space for debugging */ 4205 PetscScalar *dbg_work; 4206 4207 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4208 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4209 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4210 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4211 /* nonzeros for local mat */ 4212 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4213 if (!pcbddc->benign_change || pcbddc->fake_change) { 4214 for (i=0;i<pcis->n;i++) nnz[i]=1; 4215 } else { 4216 const PetscInt *ii; 4217 PetscInt n; 4218 PetscBool flg_row; 4219 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4220 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4221 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4222 } 4223 for (i=n_vertices;i<total_counts_cc;i++) { 4224 if (PetscBTLookup(change_basis,i)) { 4225 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4226 if (PetscBTLookup(qr_needed_idx,i)) { 4227 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4228 } else { 4229 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4230 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4231 } 4232 } 4233 } 4234 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4235 ierr = PetscFree(nnz);CHKERRQ(ierr); 4236 /* Set interior change in the matrix */ 4237 if (!pcbddc->benign_change || pcbddc->fake_change) { 4238 for (i=0;i<pcis->n;i++) { 4239 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4240 } 4241 } else { 4242 const PetscInt *ii,*jj; 4243 PetscScalar *aa; 4244 PetscInt n; 4245 PetscBool flg_row; 4246 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4247 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4248 for (i=0;i<n;i++) { 4249 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4250 } 4251 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4252 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4253 } 4254 4255 if (pcbddc->dbg_flag) { 4256 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4258 } 4259 4260 4261 /* Now we loop on the constraints which need a change of basis */ 4262 /* 4263 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4264 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4265 4266 Basic blocks of change of basis matrix T computed by 4267 4268 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4269 4270 | 1 0 ... 0 s_1/S | 4271 | 0 1 ... 0 s_2/S | 4272 | ... | 4273 | 0 ... 1 s_{n-1}/S | 4274 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4275 4276 with S = \sum_{i=1}^n s_i^2 4277 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4278 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4279 4280 - QR decomposition of constraints otherwise 4281 */ 4282 if (qr_needed) { 4283 /* space to store Q */ 4284 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4285 /* first we issue queries for optimal work */ 4286 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4287 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4288 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4289 lqr_work = -1; 4290 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4291 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4292 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4293 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4294 lgqr_work = -1; 4295 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4296 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4297 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4298 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4299 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4300 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4301 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4302 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4303 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4304 /* array to store scaling factors for reflectors */ 4305 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4306 /* array to store rhs and solution of triangular solver */ 4307 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4308 /* allocating workspace for check */ 4309 if (pcbddc->dbg_flag) { 4310 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4311 } 4312 } 4313 /* array to store whether a node is primal or not */ 4314 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4315 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4316 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4317 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4318 for (i=0;i<total_primal_vertices;i++) { 4319 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4320 } 4321 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4322 4323 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4324 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4325 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4326 if (PetscBTLookup(change_basis,total_counts)) { 4327 /* get constraint info */ 4328 primal_dofs = constraints_n[total_counts]; 4329 dual_dofs = size_of_constraint-primal_dofs; 4330 4331 if (pcbddc->dbg_flag) { 4332 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4333 } 4334 4335 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4336 4337 /* copy quadrature constraints for change of basis check */ 4338 if (pcbddc->dbg_flag) { 4339 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4340 } 4341 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4342 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4343 4344 /* compute QR decomposition of constraints */ 4345 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4346 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4347 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4348 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4349 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4350 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4351 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4352 4353 /* explictly compute R^-T */ 4354 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4355 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4356 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4357 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4358 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4359 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4360 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4361 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4362 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4363 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4364 4365 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4366 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4367 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4368 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4369 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4370 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4371 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4372 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4373 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4374 4375 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4376 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4377 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4379 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4380 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4381 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4382 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4383 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4384 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4385 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4386 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4387 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4388 4389 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4390 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4391 /* insert cols for primal dofs */ 4392 for (j=0;j<primal_dofs;j++) { 4393 start_vals = &qr_basis[j*size_of_constraint]; 4394 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4395 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4396 } 4397 /* insert cols for dual dofs */ 4398 for (j=0,k=0;j<dual_dofs;k++) { 4399 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4400 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4401 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4402 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4403 j++; 4404 } 4405 } 4406 4407 /* check change of basis */ 4408 if (pcbddc->dbg_flag) { 4409 PetscInt ii,jj; 4410 PetscBool valid_qr=PETSC_TRUE; 4411 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4412 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4413 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4414 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4415 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4417 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4418 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4420 for (jj=0;jj<size_of_constraint;jj++) { 4421 for (ii=0;ii<primal_dofs;ii++) { 4422 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4423 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4424 } 4425 } 4426 if (!valid_qr) { 4427 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4428 for (jj=0;jj<size_of_constraint;jj++) { 4429 for (ii=0;ii<primal_dofs;ii++) { 4430 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4431 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4432 } 4433 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4434 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4435 } 4436 } 4437 } 4438 } else { 4439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4440 } 4441 } 4442 } else { /* simple transformation block */ 4443 PetscInt row,col; 4444 PetscScalar val,norm; 4445 4446 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4447 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4448 for (j=0;j<size_of_constraint;j++) { 4449 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4450 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4451 if (!PetscBTLookup(is_primal,row_B)) { 4452 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4453 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4454 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4455 } else { 4456 for (k=0;k<size_of_constraint;k++) { 4457 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4458 if (row != col) { 4459 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4460 } else { 4461 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4462 } 4463 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4464 } 4465 } 4466 } 4467 if (pcbddc->dbg_flag) { 4468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4469 } 4470 } 4471 } else { 4472 if (pcbddc->dbg_flag) { 4473 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4474 } 4475 } 4476 } 4477 4478 /* free workspace */ 4479 if (qr_needed) { 4480 if (pcbddc->dbg_flag) { 4481 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4482 } 4483 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4484 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4485 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4486 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4487 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4488 } 4489 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4490 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4491 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4492 4493 /* assembling of global change of variable */ 4494 if (!pcbddc->fake_change) { 4495 Mat tmat; 4496 PetscInt bs; 4497 4498 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4499 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4500 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4501 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4502 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4503 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4504 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4505 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4506 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4507 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4508 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4509 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4510 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4511 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4512 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4513 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4514 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4515 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4516 4517 /* check */ 4518 if (pcbddc->dbg_flag) { 4519 PetscReal error; 4520 Vec x,x_change; 4521 4522 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4523 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4524 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4525 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4526 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4527 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4528 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4529 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4530 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4531 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4532 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4533 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4534 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4536 ierr = VecDestroy(&x);CHKERRQ(ierr); 4537 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4538 } 4539 /* adapt sub_schurs computed (if any) */ 4540 if (pcbddc->use_deluxe_scaling) { 4541 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4542 4543 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4544 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4545 } 4546 if (sub_schurs && sub_schurs->S_Ej_all) { 4547 Mat S_new,tmat; 4548 IS is_all_N,is_V_Sall = NULL; 4549 4550 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4551 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4552 if (pcbddc->deluxe_zerorows) { 4553 ISLocalToGlobalMapping NtoSall; 4554 IS is_V; 4555 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4556 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4557 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4558 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4559 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4560 } 4561 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4562 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4563 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4564 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4565 if (pcbddc->deluxe_zerorows) { 4566 const PetscScalar *array; 4567 const PetscInt *idxs_V,*idxs_all; 4568 PetscInt i,n_V; 4569 4570 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4571 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4572 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4573 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4574 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4575 for (i=0;i<n_V;i++) { 4576 PetscScalar val; 4577 PetscInt idx; 4578 4579 idx = idxs_V[i]; 4580 val = array[idxs_all[idxs_V[i]]]; 4581 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4582 } 4583 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4584 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4585 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4586 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4587 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4588 } 4589 sub_schurs->S_Ej_all = S_new; 4590 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4591 if (sub_schurs->sum_S_Ej_all) { 4592 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4593 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4594 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4595 if (pcbddc->deluxe_zerorows) { 4596 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4597 } 4598 sub_schurs->sum_S_Ej_all = S_new; 4599 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4600 } 4601 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4602 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4603 } 4604 /* destroy any change of basis context in sub_schurs */ 4605 if (sub_schurs && sub_schurs->change) { 4606 PetscInt i; 4607 4608 for (i=0;i<sub_schurs->n_subs;i++) { 4609 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4610 } 4611 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4612 } 4613 } 4614 if (pcbddc->switch_static) { /* need to save the local change */ 4615 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4616 } else { 4617 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4618 } 4619 /* determine if any process has changed the pressures locally */ 4620 pcbddc->change_interior = pcbddc->benign_have_null; 4621 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4622 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4623 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4624 pcbddc->use_qr_single = qr_needed; 4625 } 4626 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4627 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4628 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4629 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4630 } else { 4631 Mat benign_global = NULL; 4632 if (pcbddc->benign_have_null) { 4633 Mat tmat; 4634 4635 pcbddc->change_interior = PETSC_TRUE; 4636 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4637 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4638 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4639 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4640 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4641 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4642 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4643 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4644 if (pcbddc->benign_change) { 4645 Mat M; 4646 4647 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4648 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4649 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4650 ierr = MatDestroy(&M);CHKERRQ(ierr); 4651 } else { 4652 Mat eye; 4653 PetscScalar *array; 4654 4655 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4656 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4657 for (i=0;i<pcis->n;i++) { 4658 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4659 } 4660 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4661 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4662 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4663 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4664 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4665 } 4666 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4667 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4668 } 4669 if (pcbddc->user_ChangeOfBasisMatrix) { 4670 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4671 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4672 } else if (pcbddc->benign_have_null) { 4673 pcbddc->ChangeOfBasisMatrix = benign_global; 4674 } 4675 } 4676 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4677 IS is_global; 4678 const PetscInt *gidxs; 4679 4680 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4681 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4682 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4683 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4684 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4685 } 4686 } 4687 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4688 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4689 } 4690 4691 if (!pcbddc->fake_change) { 4692 /* add pressure dofs to set of primal nodes for numbering purposes */ 4693 for (i=0;i<pcbddc->benign_n;i++) { 4694 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4695 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4696 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4697 pcbddc->local_primal_size_cc++; 4698 pcbddc->local_primal_size++; 4699 } 4700 4701 /* check if a new primal space has been introduced (also take into account benign trick) */ 4702 pcbddc->new_primal_space_local = PETSC_TRUE; 4703 if (olocal_primal_size == pcbddc->local_primal_size) { 4704 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4705 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4706 if (!pcbddc->new_primal_space_local) { 4707 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4708 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4709 } 4710 } 4711 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4712 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4713 } 4714 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4715 4716 /* flush dbg viewer */ 4717 if (pcbddc->dbg_flag) { 4718 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4719 } 4720 4721 /* free workspace */ 4722 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4723 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4724 if (!pcbddc->adaptive_selection) { 4725 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4726 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4727 } else { 4728 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4729 pcbddc->adaptive_constraints_idxs_ptr, 4730 pcbddc->adaptive_constraints_data_ptr, 4731 pcbddc->adaptive_constraints_idxs, 4732 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4733 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4734 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4735 } 4736 PetscFunctionReturn(0); 4737 } 4738 4739 #undef __FUNCT__ 4740 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4741 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4742 { 4743 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4744 PC_IS *pcis = (PC_IS*)pc->data; 4745 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4746 PetscInt ierr,i,N; 4747 4748 PetscFunctionBegin; 4749 /* Reset previously computed graph */ 4750 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4751 /* Init local Graph struct */ 4752 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4753 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4754 4755 /* Check validity of the csr graph passed in by the user */ 4756 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4757 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4758 } 4759 4760 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4761 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4762 PetscInt *xadj,*adjncy; 4763 PetscInt nvtxs; 4764 PetscBool flg_row=PETSC_FALSE; 4765 4766 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4767 if (flg_row) { 4768 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4769 pcbddc->computed_rowadj = PETSC_TRUE; 4770 } 4771 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4772 } 4773 if (pcbddc->dbg_flag) { 4774 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4775 } 4776 4777 /* Setup of Graph */ 4778 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4779 4780 /* attach info on disconnected subdomains if present */ 4781 if (pcbddc->n_local_subs) { 4782 PetscInt *local_subs; 4783 4784 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4785 for (i=0;i<pcbddc->n_local_subs;i++) { 4786 const PetscInt *idxs; 4787 PetscInt nl,j; 4788 4789 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4790 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4791 for (j=0;j<nl;j++) { 4792 local_subs[idxs[j]] = i; 4793 } 4794 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4795 } 4796 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4797 pcbddc->mat_graph->local_subs = local_subs; 4798 } 4799 4800 /* Graph's connected components analysis */ 4801 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4802 PetscFunctionReturn(0); 4803 } 4804 4805 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4806 #undef __FUNCT__ 4807 #define __FUNCT__ "PCBDDCSubsetNumbering" 4808 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4809 { 4810 PetscSF sf; 4811 PetscLayout map; 4812 const PetscInt *idxs; 4813 PetscInt *leaf_data,*root_data,*gidxs; 4814 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4815 PetscInt n_n,nlocals,start,first_index; 4816 PetscMPIInt commsize; 4817 PetscBool first_found; 4818 PetscErrorCode ierr; 4819 4820 PetscFunctionBegin; 4821 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4822 if (subset_mult) { 4823 PetscCheckSameComm(subset,1,subset_mult,2); 4824 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4825 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4826 } 4827 /* create workspace layout for computing global indices of subset */ 4828 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4829 lbounds[0] = lbounds[1] = 0; 4830 for (i=0;i<n;i++) { 4831 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4832 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4833 } 4834 lbounds[0] = -lbounds[0]; 4835 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4836 gbounds[0] = -gbounds[0]; 4837 N = gbounds[1] - gbounds[0] + 1; 4838 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4839 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4840 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4841 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4842 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4843 4844 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4845 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4846 if (subset_mult) { 4847 const PetscInt* idxs_mult; 4848 4849 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4850 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4851 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4852 } else { 4853 for (i=0;i<n;i++) leaf_data[i] = 1; 4854 } 4855 /* local size of new subset */ 4856 n_n = 0; 4857 for (i=0;i<n;i++) n_n += leaf_data[i]; 4858 4859 /* global indexes in layout */ 4860 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4861 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4862 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4863 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4864 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4865 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4866 4867 /* reduce from leaves to roots */ 4868 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4869 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4870 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4871 4872 /* count indexes in local part of layout */ 4873 nlocals = 0; 4874 first_index = -1; 4875 first_found = PETSC_FALSE; 4876 for (i=0;i<Nl;i++) { 4877 if (!first_found && root_data[i]) { 4878 first_found = PETSC_TRUE; 4879 first_index = i; 4880 } 4881 nlocals += root_data[i]; 4882 } 4883 4884 /* cumulative of number of indexes and size of subset without holes */ 4885 #if defined(PETSC_HAVE_MPI_EXSCAN) 4886 start = 0; 4887 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4888 #else 4889 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4890 start = start-nlocals; 4891 #endif 4892 4893 if (N_n) { /* compute total size of new subset if requested */ 4894 *N_n = start + nlocals; 4895 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4896 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4897 } 4898 4899 /* adapt root data with cumulative */ 4900 if (first_found) { 4901 PetscInt old_index; 4902 4903 root_data[first_index] += start; 4904 old_index = first_index; 4905 for (i=first_index+1;i<Nl;i++) { 4906 if (root_data[i]) { 4907 root_data[i] += root_data[old_index]; 4908 old_index = i; 4909 } 4910 } 4911 } 4912 4913 /* from roots to leaves */ 4914 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4915 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4916 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4917 4918 /* create new IS with global indexes without holes */ 4919 if (subset_mult) { 4920 const PetscInt* idxs_mult; 4921 PetscInt cum; 4922 4923 cum = 0; 4924 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4925 for (i=0;i<n;i++) { 4926 PetscInt j; 4927 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4928 } 4929 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4930 } else { 4931 for (i=0;i<n;i++) { 4932 gidxs[i] = leaf_data[i]-1; 4933 } 4934 } 4935 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4936 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4937 PetscFunctionReturn(0); 4938 } 4939 4940 /* this implements stabilized Gram-Schmidt */ 4941 #undef __FUNCT__ 4942 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4943 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4944 { 4945 PetscInt i,j; 4946 PetscScalar *alphas; 4947 PetscErrorCode ierr; 4948 4949 PetscFunctionBegin; 4950 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4951 for (i=0;i<n;i++) { 4952 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4953 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4954 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4955 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4956 } 4957 ierr = PetscFree(alphas);CHKERRQ(ierr); 4958 PetscFunctionReturn(0); 4959 } 4960 4961 #undef __FUNCT__ 4962 #define __FUNCT__ "MatISGetSubassemblingPattern" 4963 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4964 { 4965 Mat A; 4966 PetscInt n_neighs,*neighs,*n_shared,**shared; 4967 PetscMPIInt size,rank,color; 4968 PetscInt *xadj,*adjncy; 4969 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4970 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4971 PetscInt void_procs,*procs_candidates = NULL; 4972 PetscInt xadj_count, *count; 4973 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4974 PetscSubcomm psubcomm; 4975 MPI_Comm subcomm; 4976 PetscErrorCode ierr; 4977 4978 PetscFunctionBegin; 4979 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4980 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4981 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4982 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4983 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4984 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4985 4986 if (have_void) *have_void = PETSC_FALSE; 4987 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4988 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4989 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4990 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4991 im_active = !!(n); 4992 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4993 void_procs = size - active_procs; 4994 /* get ranks of of non-active processes in mat communicator */ 4995 if (void_procs) { 4996 PetscInt ncand; 4997 4998 if (have_void) *have_void = PETSC_TRUE; 4999 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5000 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5001 for (i=0,ncand=0;i<size;i++) { 5002 if (!procs_candidates[i]) { 5003 procs_candidates[ncand++] = i; 5004 } 5005 } 5006 /* force n_subdomains to be not greater that the number of non-active processes */ 5007 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5008 } 5009 5010 /* number of subdomains requested greater than active processes -> just shift the matrix */ 5011 if (active_procs < *n_subdomains) { 5012 PetscInt issize,isidx; 5013 if (im_active) { 5014 issize = 1; 5015 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5016 isidx = procs_candidates[rank]; 5017 } else { 5018 isidx = rank; 5019 } 5020 } else { 5021 issize = 0; 5022 isidx = -1; 5023 } 5024 *n_subdomains = active_procs; 5025 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5026 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5027 PetscFunctionReturn(0); 5028 } 5029 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5030 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5031 threshold = PetscMax(threshold,2); 5032 5033 /* Get info on mapping */ 5034 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5035 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5036 5037 /* build local CSR graph of subdomains' connectivity */ 5038 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5039 xadj[0] = 0; 5040 xadj[1] = PetscMax(n_neighs-1,0); 5041 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5042 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5043 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5044 for (i=1;i<n_neighs;i++) 5045 for (j=0;j<n_shared[i];j++) 5046 count[shared[i][j]] += 1; 5047 5048 xadj_count = 0; 5049 for (i=1;i<n_neighs;i++) { 5050 for (j=0;j<n_shared[i];j++) { 5051 if (count[shared[i][j]] < threshold) { 5052 adjncy[xadj_count] = neighs[i]; 5053 adjncy_wgt[xadj_count] = n_shared[i]; 5054 xadj_count++; 5055 break; 5056 } 5057 } 5058 } 5059 xadj[1] = xadj_count; 5060 ierr = PetscFree(count);CHKERRQ(ierr); 5061 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5062 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5063 5064 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5065 5066 /* Restrict work on active processes only */ 5067 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5068 if (void_procs) { 5069 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5070 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5071 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5072 subcomm = PetscSubcommChild(psubcomm); 5073 } else { 5074 psubcomm = NULL; 5075 subcomm = PetscObjectComm((PetscObject)mat); 5076 } 5077 5078 v_wgt = NULL; 5079 if (!color) { 5080 ierr = PetscFree(xadj);CHKERRQ(ierr); 5081 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5082 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5083 } else { 5084 Mat subdomain_adj; 5085 IS new_ranks,new_ranks_contig; 5086 MatPartitioning partitioner; 5087 PetscInt rstart=0,rend=0; 5088 PetscInt *is_indices,*oldranks; 5089 PetscMPIInt size; 5090 PetscBool aggregate; 5091 5092 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5093 if (void_procs) { 5094 PetscInt prank = rank; 5095 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5096 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5097 for (i=0;i<xadj[1];i++) { 5098 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5099 } 5100 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5101 } else { 5102 oldranks = NULL; 5103 } 5104 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5105 if (aggregate) { /* TODO: all this part could be made more efficient */ 5106 PetscInt lrows,row,ncols,*cols; 5107 PetscMPIInt nrank; 5108 PetscScalar *vals; 5109 5110 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5111 lrows = 0; 5112 if (nrank<redprocs) { 5113 lrows = size/redprocs; 5114 if (nrank<size%redprocs) lrows++; 5115 } 5116 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5117 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5118 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5119 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5120 row = nrank; 5121 ncols = xadj[1]-xadj[0]; 5122 cols = adjncy; 5123 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5124 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5125 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5126 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5127 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5128 ierr = PetscFree(xadj);CHKERRQ(ierr); 5129 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5130 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5131 ierr = PetscFree(vals);CHKERRQ(ierr); 5132 if (use_vwgt) { 5133 Vec v; 5134 const PetscScalar *array; 5135 PetscInt nl; 5136 5137 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5138 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5139 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5140 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5141 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5142 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5143 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5144 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5145 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5146 ierr = VecDestroy(&v);CHKERRQ(ierr); 5147 } 5148 } else { 5149 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5150 if (use_vwgt) { 5151 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5152 v_wgt[0] = local_size; 5153 } 5154 } 5155 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5156 5157 /* Partition */ 5158 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5159 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5160 if (v_wgt) { 5161 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5162 } 5163 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5164 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5165 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5166 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5167 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5168 5169 /* renumber new_ranks to avoid "holes" in new set of processors */ 5170 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5171 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5172 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5173 if (!aggregate) { 5174 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5175 #if defined(PETSC_USE_DEBUG) 5176 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5177 #endif 5178 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5179 } else if (oldranks) { 5180 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5181 } else { 5182 ranks_send_to_idx[0] = is_indices[0]; 5183 } 5184 } else { 5185 PetscInt idxs[1]; 5186 PetscMPIInt tag; 5187 MPI_Request *reqs; 5188 5189 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5190 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5191 for (i=rstart;i<rend;i++) { 5192 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5193 } 5194 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5195 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5196 ierr = PetscFree(reqs);CHKERRQ(ierr); 5197 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5198 #if defined(PETSC_USE_DEBUG) 5199 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5200 #endif 5201 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5202 } else if (oldranks) { 5203 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5204 } else { 5205 ranks_send_to_idx[0] = idxs[0]; 5206 } 5207 } 5208 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5209 /* clean up */ 5210 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5211 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5212 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5213 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5214 } 5215 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5216 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5217 5218 /* assemble parallel IS for sends */ 5219 i = 1; 5220 if (!color) i=0; 5221 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5222 PetscFunctionReturn(0); 5223 } 5224 5225 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5226 5227 #undef __FUNCT__ 5228 #define __FUNCT__ "MatISSubassemble" 5229 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5230 { 5231 Mat local_mat; 5232 IS is_sends_internal; 5233 PetscInt rows,cols,new_local_rows; 5234 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5235 PetscBool ismatis,isdense,newisdense,destroy_mat; 5236 ISLocalToGlobalMapping l2gmap; 5237 PetscInt* l2gmap_indices; 5238 const PetscInt* is_indices; 5239 MatType new_local_type; 5240 /* buffers */ 5241 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5242 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5243 PetscInt *recv_buffer_idxs_local; 5244 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5245 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5246 /* MPI */ 5247 MPI_Comm comm,comm_n; 5248 PetscSubcomm subcomm; 5249 PetscMPIInt n_sends,n_recvs,commsize; 5250 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5251 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5252 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5253 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5254 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5255 PetscErrorCode ierr; 5256 5257 PetscFunctionBegin; 5258 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5259 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5260 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5261 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5262 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5263 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5264 PetscValidLogicalCollectiveBool(mat,reuse,6); 5265 PetscValidLogicalCollectiveInt(mat,nis,8); 5266 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5267 if (nvecs) { 5268 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5269 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5270 } 5271 /* further checks */ 5272 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5273 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5274 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5275 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5276 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5277 if (reuse && *mat_n) { 5278 PetscInt mrows,mcols,mnrows,mncols; 5279 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5280 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5281 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5282 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5283 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5284 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5285 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5286 } 5287 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5288 PetscValidLogicalCollectiveInt(mat,bs,0); 5289 5290 /* prepare IS for sending if not provided */ 5291 if (!is_sends) { 5292 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5293 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5294 } else { 5295 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5296 is_sends_internal = is_sends; 5297 } 5298 5299 /* get comm */ 5300 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5301 5302 /* compute number of sends */ 5303 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5304 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5305 5306 /* compute number of receives */ 5307 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5308 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5309 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5310 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5311 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5312 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5313 ierr = PetscFree(iflags);CHKERRQ(ierr); 5314 5315 /* restrict comm if requested */ 5316 subcomm = 0; 5317 destroy_mat = PETSC_FALSE; 5318 if (restrict_comm) { 5319 PetscMPIInt color,subcommsize; 5320 5321 color = 0; 5322 if (restrict_full) { 5323 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5324 } else { 5325 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5326 } 5327 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5328 subcommsize = commsize - subcommsize; 5329 /* check if reuse has been requested */ 5330 if (reuse) { 5331 if (*mat_n) { 5332 PetscMPIInt subcommsize2; 5333 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5334 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5335 comm_n = PetscObjectComm((PetscObject)*mat_n); 5336 } else { 5337 comm_n = PETSC_COMM_SELF; 5338 } 5339 } else { /* MAT_INITIAL_MATRIX */ 5340 PetscMPIInt rank; 5341 5342 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5343 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5344 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5345 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5346 comm_n = PetscSubcommChild(subcomm); 5347 } 5348 /* flag to destroy *mat_n if not significative */ 5349 if (color) destroy_mat = PETSC_TRUE; 5350 } else { 5351 comm_n = comm; 5352 } 5353 5354 /* prepare send/receive buffers */ 5355 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5356 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5357 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5358 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5359 if (nis) { 5360 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5361 } 5362 5363 /* Get data from local matrices */ 5364 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5365 /* TODO: See below some guidelines on how to prepare the local buffers */ 5366 /* 5367 send_buffer_vals should contain the raw values of the local matrix 5368 send_buffer_idxs should contain: 5369 - MatType_PRIVATE type 5370 - PetscInt size_of_l2gmap 5371 - PetscInt global_row_indices[size_of_l2gmap] 5372 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5373 */ 5374 else { 5375 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5376 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5377 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5378 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5379 send_buffer_idxs[1] = i; 5380 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5381 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5382 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5383 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5384 for (i=0;i<n_sends;i++) { 5385 ilengths_vals[is_indices[i]] = len*len; 5386 ilengths_idxs[is_indices[i]] = len+2; 5387 } 5388 } 5389 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5390 /* additional is (if any) */ 5391 if (nis) { 5392 PetscMPIInt psum; 5393 PetscInt j; 5394 for (j=0,psum=0;j<nis;j++) { 5395 PetscInt plen; 5396 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5397 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5398 psum += len+1; /* indices + lenght */ 5399 } 5400 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5401 for (j=0,psum=0;j<nis;j++) { 5402 PetscInt plen; 5403 const PetscInt *is_array_idxs; 5404 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5405 send_buffer_idxs_is[psum] = plen; 5406 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5407 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5408 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5409 psum += plen+1; /* indices + lenght */ 5410 } 5411 for (i=0;i<n_sends;i++) { 5412 ilengths_idxs_is[is_indices[i]] = psum; 5413 } 5414 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5415 } 5416 5417 buf_size_idxs = 0; 5418 buf_size_vals = 0; 5419 buf_size_idxs_is = 0; 5420 buf_size_vecs = 0; 5421 for (i=0;i<n_recvs;i++) { 5422 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5423 buf_size_vals += (PetscInt)olengths_vals[i]; 5424 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5425 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5426 } 5427 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5428 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5429 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5430 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5431 5432 /* get new tags for clean communications */ 5433 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5434 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5435 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5436 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5437 5438 /* allocate for requests */ 5439 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5440 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5441 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5442 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5443 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5444 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5445 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5446 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5447 5448 /* communications */ 5449 ptr_idxs = recv_buffer_idxs; 5450 ptr_vals = recv_buffer_vals; 5451 ptr_idxs_is = recv_buffer_idxs_is; 5452 ptr_vecs = recv_buffer_vecs; 5453 for (i=0;i<n_recvs;i++) { 5454 source_dest = onodes[i]; 5455 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5456 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5457 ptr_idxs += olengths_idxs[i]; 5458 ptr_vals += olengths_vals[i]; 5459 if (nis) { 5460 source_dest = onodes_is[i]; 5461 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5462 ptr_idxs_is += olengths_idxs_is[i]; 5463 } 5464 if (nvecs) { 5465 source_dest = onodes[i]; 5466 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5467 ptr_vecs += olengths_idxs[i]-2; 5468 } 5469 } 5470 for (i=0;i<n_sends;i++) { 5471 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5472 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5473 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5474 if (nis) { 5475 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5476 } 5477 if (nvecs) { 5478 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5479 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5480 } 5481 } 5482 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5483 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5484 5485 /* assemble new l2g map */ 5486 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5487 ptr_idxs = recv_buffer_idxs; 5488 new_local_rows = 0; 5489 for (i=0;i<n_recvs;i++) { 5490 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5491 ptr_idxs += olengths_idxs[i]; 5492 } 5493 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5494 ptr_idxs = recv_buffer_idxs; 5495 new_local_rows = 0; 5496 for (i=0;i<n_recvs;i++) { 5497 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5498 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5499 ptr_idxs += olengths_idxs[i]; 5500 } 5501 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5502 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5503 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5504 5505 /* infer new local matrix type from received local matrices type */ 5506 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5507 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5508 if (n_recvs) { 5509 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5510 ptr_idxs = recv_buffer_idxs; 5511 for (i=0;i<n_recvs;i++) { 5512 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5513 new_local_type_private = MATAIJ_PRIVATE; 5514 break; 5515 } 5516 ptr_idxs += olengths_idxs[i]; 5517 } 5518 switch (new_local_type_private) { 5519 case MATDENSE_PRIVATE: 5520 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5521 new_local_type = MATSEQAIJ; 5522 bs = 1; 5523 } else { /* if I receive only 1 dense matrix */ 5524 new_local_type = MATSEQDENSE; 5525 bs = 1; 5526 } 5527 break; 5528 case MATAIJ_PRIVATE: 5529 new_local_type = MATSEQAIJ; 5530 bs = 1; 5531 break; 5532 case MATBAIJ_PRIVATE: 5533 new_local_type = MATSEQBAIJ; 5534 break; 5535 case MATSBAIJ_PRIVATE: 5536 new_local_type = MATSEQSBAIJ; 5537 break; 5538 default: 5539 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5540 break; 5541 } 5542 } else { /* by default, new_local_type is seqdense */ 5543 new_local_type = MATSEQDENSE; 5544 bs = 1; 5545 } 5546 5547 /* create MATIS object if needed */ 5548 if (!reuse) { 5549 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5550 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5551 } else { 5552 /* it also destroys the local matrices */ 5553 if (*mat_n) { 5554 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5555 } else { /* this is a fake object */ 5556 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5557 } 5558 } 5559 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5560 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5561 5562 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5563 5564 /* Global to local map of received indices */ 5565 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5566 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5567 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5568 5569 /* restore attributes -> type of incoming data and its size */ 5570 buf_size_idxs = 0; 5571 for (i=0;i<n_recvs;i++) { 5572 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5573 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5574 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5575 } 5576 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5577 5578 /* set preallocation */ 5579 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5580 if (!newisdense) { 5581 PetscInt *new_local_nnz=0; 5582 5583 ptr_idxs = recv_buffer_idxs_local; 5584 if (n_recvs) { 5585 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5586 } 5587 for (i=0;i<n_recvs;i++) { 5588 PetscInt j; 5589 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5590 for (j=0;j<*(ptr_idxs+1);j++) { 5591 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5592 } 5593 } else { 5594 /* TODO */ 5595 } 5596 ptr_idxs += olengths_idxs[i]; 5597 } 5598 if (new_local_nnz) { 5599 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5600 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5601 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5602 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5603 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5604 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5605 } else { 5606 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5607 } 5608 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5609 } else { 5610 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5611 } 5612 5613 /* set values */ 5614 ptr_vals = recv_buffer_vals; 5615 ptr_idxs = recv_buffer_idxs_local; 5616 for (i=0;i<n_recvs;i++) { 5617 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5618 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5619 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5620 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5621 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5622 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5623 } else { 5624 /* TODO */ 5625 } 5626 ptr_idxs += olengths_idxs[i]; 5627 ptr_vals += olengths_vals[i]; 5628 } 5629 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5630 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5631 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5634 5635 #if 0 5636 if (!restrict_comm) { /* check */ 5637 Vec lvec,rvec; 5638 PetscReal infty_error; 5639 5640 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5641 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5642 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5643 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5644 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5645 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5646 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5647 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5648 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5649 } 5650 #endif 5651 5652 /* assemble new additional is (if any) */ 5653 if (nis) { 5654 PetscInt **temp_idxs,*count_is,j,psum; 5655 5656 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5657 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5658 ptr_idxs = recv_buffer_idxs_is; 5659 psum = 0; 5660 for (i=0;i<n_recvs;i++) { 5661 for (j=0;j<nis;j++) { 5662 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5663 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5664 psum += plen; 5665 ptr_idxs += plen+1; /* shift pointer to received data */ 5666 } 5667 } 5668 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5669 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5670 for (i=1;i<nis;i++) { 5671 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5672 } 5673 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5674 ptr_idxs = recv_buffer_idxs_is; 5675 for (i=0;i<n_recvs;i++) { 5676 for (j=0;j<nis;j++) { 5677 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5678 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5679 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5680 ptr_idxs += plen+1; /* shift pointer to received data */ 5681 } 5682 } 5683 for (i=0;i<nis;i++) { 5684 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5685 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5686 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5687 } 5688 ierr = PetscFree(count_is);CHKERRQ(ierr); 5689 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5690 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5691 } 5692 /* free workspace */ 5693 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5694 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5695 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5696 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5697 if (isdense) { 5698 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5699 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5700 } else { 5701 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5702 } 5703 if (nis) { 5704 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5705 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5706 } 5707 5708 if (nvecs) { 5709 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5710 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5711 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5712 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5713 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5714 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5715 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5716 /* set values */ 5717 ptr_vals = recv_buffer_vecs; 5718 ptr_idxs = recv_buffer_idxs_local; 5719 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5720 for (i=0;i<n_recvs;i++) { 5721 PetscInt j; 5722 for (j=0;j<*(ptr_idxs+1);j++) { 5723 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5724 } 5725 ptr_idxs += olengths_idxs[i]; 5726 ptr_vals += olengths_idxs[i]-2; 5727 } 5728 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5729 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5730 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5731 } 5732 5733 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5734 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5735 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5736 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5737 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5738 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5739 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5740 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5741 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5742 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5743 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5744 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5745 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5746 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5747 ierr = PetscFree(onodes);CHKERRQ(ierr); 5748 if (nis) { 5749 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5750 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5751 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5752 } 5753 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5754 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5755 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5756 for (i=0;i<nis;i++) { 5757 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5758 } 5759 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5760 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5761 } 5762 *mat_n = NULL; 5763 } 5764 PetscFunctionReturn(0); 5765 } 5766 5767 /* temporary hack into ksp private data structure */ 5768 #include <petsc/private/kspimpl.h> 5769 5770 #undef __FUNCT__ 5771 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5772 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5773 { 5774 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5775 PC_IS *pcis = (PC_IS*)pc->data; 5776 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5777 Mat coarsedivudotp = NULL; 5778 MatNullSpace CoarseNullSpace = NULL; 5779 ISLocalToGlobalMapping coarse_islg; 5780 IS coarse_is,*isarray; 5781 PetscInt i,im_active=-1,active_procs=-1; 5782 PetscInt nis,nisdofs,nisneu,nisvert; 5783 PC pc_temp; 5784 PCType coarse_pc_type; 5785 KSPType coarse_ksp_type; 5786 PetscBool multilevel_requested,multilevel_allowed; 5787 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5788 Mat t_coarse_mat_is; 5789 PetscInt ncoarse; 5790 PetscBool compute_vecs = PETSC_FALSE; 5791 PetscScalar *array; 5792 MatReuse coarse_mat_reuse; 5793 PetscBool restr, full_restr, have_void; 5794 PetscErrorCode ierr; 5795 5796 PetscFunctionBegin; 5797 /* Assign global numbering to coarse dofs */ 5798 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5799 PetscInt ocoarse_size; 5800 compute_vecs = PETSC_TRUE; 5801 ocoarse_size = pcbddc->coarse_size; 5802 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5803 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5804 /* see if we can avoid some work */ 5805 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5806 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5807 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5808 PC pc; 5809 PetscBool isbddc; 5810 5811 /* temporary workaround since PCBDDC does not have a reset method so far */ 5812 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5813 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5814 if (isbddc) { 5815 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5816 } else { 5817 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5818 } 5819 coarse_reuse = PETSC_FALSE; 5820 } else { /* we can safely reuse already computed coarse matrix */ 5821 coarse_reuse = PETSC_TRUE; 5822 } 5823 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5824 coarse_reuse = PETSC_FALSE; 5825 } 5826 /* reset any subassembling information */ 5827 if (!coarse_reuse || pcbddc->recompute_topography) { 5828 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5829 } 5830 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5831 coarse_reuse = PETSC_TRUE; 5832 } 5833 /* assemble coarse matrix */ 5834 if (coarse_reuse && pcbddc->coarse_ksp) { 5835 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5836 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5837 coarse_mat_reuse = MAT_REUSE_MATRIX; 5838 } else { 5839 coarse_mat = NULL; 5840 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5841 } 5842 5843 /* creates temporary l2gmap and IS for coarse indexes */ 5844 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5845 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5846 5847 /* creates temporary MATIS object for coarse matrix */ 5848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5849 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5850 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5851 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5852 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5853 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5854 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5855 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5856 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5857 5858 /* count "active" (i.e. with positive local size) and "void" processes */ 5859 im_active = !!(pcis->n); 5860 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5861 5862 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5863 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5864 /* full_restr : just use the receivers from the subassembling pattern */ 5865 coarse_mat_is = NULL; 5866 multilevel_allowed = PETSC_FALSE; 5867 multilevel_requested = PETSC_FALSE; 5868 full_restr = PETSC_TRUE; 5869 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5870 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5871 if (multilevel_requested) { 5872 ncoarse = active_procs/pcbddc->coarsening_ratio; 5873 restr = PETSC_FALSE; 5874 full_restr = PETSC_FALSE; 5875 } else { 5876 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5877 restr = PETSC_TRUE; 5878 full_restr = PETSC_TRUE; 5879 } 5880 ncoarse = PetscMax(1,ncoarse); 5881 if (!pcbddc->coarse_subassembling) { 5882 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5883 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5884 PetscInt psum; 5885 PetscMPIInt size; 5886 if (pcbddc->coarse_ksp) psum = 1; 5887 else psum = 0; 5888 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5889 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5890 if (ncoarse < size) have_void = PETSC_TRUE; 5891 } 5892 /* determine if we can go multilevel */ 5893 if (multilevel_requested) { 5894 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5895 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5896 } 5897 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5898 5899 /* dump subassembling pattern */ 5900 if (pcbddc->dbg_flag && multilevel_allowed) { 5901 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5902 } 5903 5904 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5905 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5906 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5907 const PetscInt *idxs; 5908 ISLocalToGlobalMapping tmap; 5909 5910 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5911 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5912 /* allocate space for temporary storage */ 5913 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5914 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5915 /* allocate for IS array */ 5916 nisdofs = pcbddc->n_ISForDofsLocal; 5917 nisneu = !!pcbddc->NeumannBoundariesLocal; 5918 nisvert = 0; /* nisvert is not used */ 5919 nis = nisdofs + nisneu + nisvert; 5920 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5921 /* dofs splitting */ 5922 for (i=0;i<nisdofs;i++) { 5923 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5924 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5925 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5926 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5927 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5928 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5929 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5930 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5931 } 5932 /* neumann boundaries */ 5933 if (pcbddc->NeumannBoundariesLocal) { 5934 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5935 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5936 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5937 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5938 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5939 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5940 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5941 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5942 } 5943 /* free memory */ 5944 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5945 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5946 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5947 } else { 5948 nis = 0; 5949 nisdofs = 0; 5950 nisneu = 0; 5951 nisvert = 0; 5952 isarray = NULL; 5953 } 5954 /* destroy no longer needed map */ 5955 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5956 5957 /* subassemble */ 5958 if (multilevel_allowed) { 5959 Vec vp[1]; 5960 PetscInt nvecs = 0; 5961 PetscBool reuse,reuser; 5962 5963 if (coarse_mat) reuse = PETSC_TRUE; 5964 else reuse = PETSC_FALSE; 5965 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5966 vp[0] = NULL; 5967 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5968 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5969 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5970 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5971 nvecs = 1; 5972 5973 if (pcbddc->divudotp) { 5974 Mat B; 5975 Vec v,p; 5976 IS dummy; 5977 PetscInt np; 5978 5979 ierr = MatGetSize(pcbddc->divudotp,&np,NULL);CHKERRQ(ierr); 5980 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5981 ierr = MatGetSubMatrix(pcbddc->divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5982 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5983 ierr = VecSet(p,1.);CHKERRQ(ierr); 5984 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5985 ierr = VecDestroy(&p);CHKERRQ(ierr); 5986 ierr = MatDestroy(&B);CHKERRQ(ierr); 5987 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5988 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5989 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5990 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5991 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5992 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5993 ierr = VecDestroy(&v);CHKERRQ(ierr); 5994 } 5995 } 5996 if (reuser) { 5997 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5998 } else { 5999 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6000 } 6001 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6002 PetscScalar *arraym,*arrayv; 6003 PetscInt nl; 6004 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6005 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6006 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6007 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6008 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6009 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6010 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6011 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6012 } 6013 } else { 6014 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6015 } 6016 if (coarse_mat_is || coarse_mat) { 6017 PetscMPIInt size; 6018 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6019 if (!multilevel_allowed) { 6020 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6021 } else { 6022 Mat A; 6023 6024 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6025 if (coarse_mat_is) { 6026 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6027 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6028 coarse_mat = coarse_mat_is; 6029 } 6030 /* be sure we don't have MatSeqDENSE as local mat */ 6031 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6032 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6033 } 6034 } 6035 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6036 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6037 6038 /* create local to global scatters for coarse problem */ 6039 if (compute_vecs) { 6040 PetscInt lrows; 6041 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6042 if (coarse_mat) { 6043 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6044 } else { 6045 lrows = 0; 6046 } 6047 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6048 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6049 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6050 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6051 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6052 } 6053 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6054 6055 /* set defaults for coarse KSP and PC */ 6056 if (multilevel_allowed) { 6057 coarse_ksp_type = KSPRICHARDSON; 6058 coarse_pc_type = PCBDDC; 6059 } else { 6060 coarse_ksp_type = KSPPREONLY; 6061 coarse_pc_type = PCREDUNDANT; 6062 } 6063 6064 /* print some info if requested */ 6065 if (pcbddc->dbg_flag) { 6066 if (!multilevel_allowed) { 6067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6068 if (multilevel_requested) { 6069 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6070 } else if (pcbddc->max_levels) { 6071 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6072 } 6073 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6074 } 6075 } 6076 6077 /* create the coarse KSP object only once with defaults */ 6078 if (coarse_mat) { 6079 PetscViewer dbg_viewer = NULL; 6080 if (pcbddc->dbg_flag) { 6081 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6082 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6083 } 6084 if (!pcbddc->coarse_ksp) { 6085 char prefix[256],str_level[16]; 6086 size_t len; 6087 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6088 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6089 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6090 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6091 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6092 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6093 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6094 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6095 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6096 /* prefix */ 6097 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6098 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6099 if (!pcbddc->current_level) { 6100 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6101 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6102 } else { 6103 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6104 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6105 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6106 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6107 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6108 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6109 } 6110 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6111 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6112 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6113 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6114 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6115 /* allow user customization */ 6116 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6117 } 6118 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6119 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6120 if (nisdofs) { 6121 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6122 for (i=0;i<nisdofs;i++) { 6123 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6124 } 6125 } 6126 if (nisneu) { 6127 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6128 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6129 } 6130 if (nisvert) { 6131 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6132 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6133 } 6134 6135 /* get some info after set from options */ 6136 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6137 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6138 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6139 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6140 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6141 isbddc = PETSC_FALSE; 6142 } 6143 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6144 if (isredundant) { 6145 KSP inner_ksp; 6146 PC inner_pc; 6147 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6148 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6149 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6150 } 6151 6152 /* parameters which miss an API */ 6153 if (isbddc) { 6154 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6155 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6156 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6157 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6158 if (pcbddc_coarse->benign_saddle_point) { 6159 if (coarsedivudotp) { 6160 ierr = MatDestroy(&pcbddc_coarse->divudotp);CHKERRQ(ierr); 6161 pcbddc_coarse->divudotp = coarsedivudotp; 6162 coarsedivudotp = NULL; 6163 } 6164 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 6165 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6166 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6167 } 6168 } 6169 6170 /* propagate symmetry info of coarse matrix */ 6171 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6172 if (pc->pmat->symmetric_set) { 6173 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6174 } 6175 if (pc->pmat->hermitian_set) { 6176 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6177 } 6178 if (pc->pmat->spd_set) { 6179 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6180 } 6181 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6182 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6183 } 6184 /* set operators */ 6185 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6186 if (pcbddc->dbg_flag) { 6187 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6188 } 6189 } 6190 ierr = PetscFree(isarray);CHKERRQ(ierr); 6191 #if 0 6192 { 6193 PetscViewer viewer; 6194 char filename[256]; 6195 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6196 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6197 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6198 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6199 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6200 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6201 } 6202 #endif 6203 6204 if (pcbddc->coarse_ksp) { 6205 Vec crhs,csol; 6206 6207 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6208 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6209 if (!csol) { 6210 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6211 } 6212 if (!crhs) { 6213 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6214 } 6215 } 6216 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6217 6218 /* compute null space for coarse solver if the benign trick has been requested */ 6219 if (pcbddc->benign_null) { 6220 6221 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6222 for (i=0;i<pcbddc->benign_n;i++) { 6223 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6224 } 6225 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6226 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6227 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6228 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6229 if (coarse_mat) { 6230 Vec nullv; 6231 PetscScalar *array,*array2; 6232 PetscInt nl; 6233 6234 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6235 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6236 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6237 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6238 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6239 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6240 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6241 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6242 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6243 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6244 } 6245 } 6246 6247 if (pcbddc->coarse_ksp) { 6248 PetscBool ispreonly; 6249 6250 if (CoarseNullSpace) { 6251 PetscBool isnull; 6252 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6253 if (isnull) { 6254 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6255 } 6256 /* TODO: add local nullspaces (if any) */ 6257 } 6258 /* setup coarse ksp */ 6259 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6260 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6261 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6262 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6263 KSP check_ksp; 6264 KSPType check_ksp_type; 6265 PC check_pc; 6266 Vec check_vec,coarse_vec; 6267 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6268 PetscInt its; 6269 PetscBool compute_eigs; 6270 PetscReal *eigs_r,*eigs_c; 6271 PetscInt neigs; 6272 const char *prefix; 6273 6274 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6275 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6276 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6277 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6278 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6279 /* prevent from setup unneeded object */ 6280 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6281 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6282 if (ispreonly) { 6283 check_ksp_type = KSPPREONLY; 6284 compute_eigs = PETSC_FALSE; 6285 } else { 6286 check_ksp_type = KSPGMRES; 6287 compute_eigs = PETSC_TRUE; 6288 } 6289 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6290 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6291 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6292 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6293 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6294 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6295 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6296 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6297 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6298 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6299 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6300 /* create random vec */ 6301 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6302 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6303 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6304 /* solve coarse problem */ 6305 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6306 /* set eigenvalue estimation if preonly has not been requested */ 6307 if (compute_eigs) { 6308 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6309 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6310 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6311 if (neigs) { 6312 lambda_max = eigs_r[neigs-1]; 6313 lambda_min = eigs_r[0]; 6314 if (pcbddc->use_coarse_estimates) { 6315 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6316 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6317 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6318 } 6319 } 6320 } 6321 } 6322 6323 /* check coarse problem residual error */ 6324 if (pcbddc->dbg_flag) { 6325 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6326 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6327 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6328 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6329 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6330 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6331 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6332 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6333 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6334 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6335 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6336 if (CoarseNullSpace) { 6337 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6338 } 6339 if (compute_eigs) { 6340 PetscReal lambda_max_s,lambda_min_s; 6341 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6342 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6343 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6344 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6345 for (i=0;i<neigs;i++) { 6346 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6347 } 6348 } 6349 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6350 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6351 } 6352 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6353 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6354 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6355 if (compute_eigs) { 6356 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6357 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6358 } 6359 } 6360 } 6361 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6362 /* print additional info */ 6363 if (pcbddc->dbg_flag) { 6364 /* waits until all processes reaches this point */ 6365 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6366 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6368 } 6369 6370 /* free memory */ 6371 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6372 PetscFunctionReturn(0); 6373 } 6374 6375 #undef __FUNCT__ 6376 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6377 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6378 { 6379 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6380 PC_IS* pcis = (PC_IS*)pc->data; 6381 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6382 IS subset,subset_mult,subset_n; 6383 PetscInt local_size,coarse_size=0; 6384 PetscInt *local_primal_indices=NULL; 6385 const PetscInt *t_local_primal_indices; 6386 PetscErrorCode ierr; 6387 6388 PetscFunctionBegin; 6389 /* Compute global number of coarse dofs */ 6390 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6391 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6392 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6393 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6394 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6395 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6396 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6397 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6398 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6399 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6400 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6401 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6402 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6403 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6404 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6405 6406 /* check numbering */ 6407 if (pcbddc->dbg_flag) { 6408 PetscScalar coarsesum,*array,*array2; 6409 PetscInt i; 6410 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6411 6412 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6413 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6414 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6415 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6416 /* counter */ 6417 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6418 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6419 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6420 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6421 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6422 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6423 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6424 for (i=0;i<pcbddc->local_primal_size;i++) { 6425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6426 } 6427 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6428 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6429 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6430 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6431 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6432 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6433 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6434 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6435 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6436 for (i=0;i<pcis->n;i++) { 6437 if (array[i] != 0.0 && array[i] != array2[i]) { 6438 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6439 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6440 set_error = PETSC_TRUE; 6441 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6442 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6443 } 6444 } 6445 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6446 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6447 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6448 for (i=0;i<pcis->n;i++) { 6449 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6450 } 6451 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6452 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6453 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6454 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6455 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6457 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6458 PetscInt *gidxs; 6459 6460 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6461 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6464 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6465 for (i=0;i<pcbddc->local_primal_size;i++) { 6466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6467 } 6468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6469 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6470 } 6471 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6472 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6473 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6474 } 6475 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6476 /* get back data */ 6477 *coarse_size_n = coarse_size; 6478 *local_primal_indices_n = local_primal_indices; 6479 PetscFunctionReturn(0); 6480 } 6481 6482 #undef __FUNCT__ 6483 #define __FUNCT__ "PCBDDCGlobalToLocal" 6484 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6485 { 6486 IS localis_t; 6487 PetscInt i,lsize,*idxs,n; 6488 PetscScalar *vals; 6489 PetscErrorCode ierr; 6490 6491 PetscFunctionBegin; 6492 /* get indices in local ordering exploiting local to global map */ 6493 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6494 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6495 for (i=0;i<lsize;i++) vals[i] = 1.0; 6496 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6497 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6498 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6499 if (idxs) { /* multilevel guard */ 6500 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6501 } 6502 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6503 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6504 ierr = PetscFree(vals);CHKERRQ(ierr); 6505 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6506 /* now compute set in local ordering */ 6507 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6508 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6509 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6510 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6511 for (i=0,lsize=0;i<n;i++) { 6512 if (PetscRealPart(vals[i]) > 0.5) { 6513 lsize++; 6514 } 6515 } 6516 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6517 for (i=0,lsize=0;i<n;i++) { 6518 if (PetscRealPart(vals[i]) > 0.5) { 6519 idxs[lsize++] = i; 6520 } 6521 } 6522 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6524 *localis = localis_t; 6525 PetscFunctionReturn(0); 6526 } 6527 6528 #undef __FUNCT__ 6529 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6530 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6531 { 6532 PC_IS *pcis=(PC_IS*)pc->data; 6533 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6534 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6535 Mat S_j; 6536 PetscInt *used_xadj,*used_adjncy; 6537 PetscBool free_used_adj; 6538 PetscErrorCode ierr; 6539 6540 PetscFunctionBegin; 6541 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6542 free_used_adj = PETSC_FALSE; 6543 if (pcbddc->sub_schurs_layers == -1) { 6544 used_xadj = NULL; 6545 used_adjncy = NULL; 6546 } else { 6547 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6548 used_xadj = pcbddc->mat_graph->xadj; 6549 used_adjncy = pcbddc->mat_graph->adjncy; 6550 } else if (pcbddc->computed_rowadj) { 6551 used_xadj = pcbddc->mat_graph->xadj; 6552 used_adjncy = pcbddc->mat_graph->adjncy; 6553 } else { 6554 PetscBool flg_row=PETSC_FALSE; 6555 const PetscInt *xadj,*adjncy; 6556 PetscInt nvtxs; 6557 6558 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6559 if (flg_row) { 6560 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6561 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6562 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6563 free_used_adj = PETSC_TRUE; 6564 } else { 6565 pcbddc->sub_schurs_layers = -1; 6566 used_xadj = NULL; 6567 used_adjncy = NULL; 6568 } 6569 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6570 } 6571 } 6572 6573 /* setup sub_schurs data */ 6574 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6575 if (!sub_schurs->schur_explicit) { 6576 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6577 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6578 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6579 } else { 6580 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6581 PetscBool isseqaij,need_change = PETSC_FALSE;; 6582 PetscInt benign_n; 6583 Mat change = NULL; 6584 Vec scaling = NULL; 6585 IS change_primal = NULL; 6586 6587 if (!pcbddc->use_vertices && reuse_solvers) { 6588 PetscInt n_vertices; 6589 6590 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6591 reuse_solvers = (PetscBool)!n_vertices; 6592 } 6593 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6594 if (!isseqaij) { 6595 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6596 if (matis->A == pcbddc->local_mat) { 6597 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6598 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6599 } else { 6600 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6601 } 6602 } 6603 if (!pcbddc->benign_change_explicit) { 6604 benign_n = pcbddc->benign_n; 6605 } else { 6606 benign_n = 0; 6607 } 6608 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6609 We need a global reduction to avoid possible deadlocks. 6610 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6611 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6612 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6613 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6614 need_change = (PetscBool)(!need_change); 6615 } 6616 /* If the user defines additional constraints, we import them here. 6617 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6618 if (need_change) { 6619 PC_IS *pcisf; 6620 PC_BDDC *pcbddcf; 6621 PC pcf; 6622 6623 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6624 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6625 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6626 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6627 /* hacks */ 6628 pcisf = (PC_IS*)pcf->data; 6629 pcisf->is_B_local = pcis->is_B_local; 6630 pcisf->vec1_N = pcis->vec1_N; 6631 pcisf->BtoNmap = pcis->BtoNmap; 6632 pcisf->n = pcis->n; 6633 pcisf->n_B = pcis->n_B; 6634 pcbddcf = (PC_BDDC*)pcf->data; 6635 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6636 pcbddcf->mat_graph = pcbddc->mat_graph; 6637 pcbddcf->use_faces = PETSC_TRUE; 6638 pcbddcf->use_change_of_basis = PETSC_TRUE; 6639 pcbddcf->use_change_on_faces = PETSC_TRUE; 6640 pcbddcf->use_qr_single = PETSC_TRUE; 6641 pcbddcf->fake_change = PETSC_TRUE; 6642 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6643 /* store information on primal vertices and change of basis (in local numbering) */ 6644 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6645 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6646 change = pcbddcf->ConstraintMatrix; 6647 pcbddcf->ConstraintMatrix = NULL; 6648 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6649 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6650 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6651 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6652 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6653 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6654 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6655 pcf->ops->destroy = NULL; 6656 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6657 } 6658 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6659 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6660 ierr = MatDestroy(&change);CHKERRQ(ierr); 6661 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6662 } 6663 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6664 6665 /* free adjacency */ 6666 if (free_used_adj) { 6667 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6668 } 6669 PetscFunctionReturn(0); 6670 } 6671 6672 #undef __FUNCT__ 6673 #define __FUNCT__ "PCBDDCInitSubSchurs" 6674 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6675 { 6676 PC_IS *pcis=(PC_IS*)pc->data; 6677 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6678 PCBDDCGraph graph; 6679 PetscErrorCode ierr; 6680 6681 PetscFunctionBegin; 6682 /* attach interface graph for determining subsets */ 6683 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6684 IS verticesIS,verticescomm; 6685 PetscInt vsize,*idxs; 6686 6687 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6688 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6689 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6690 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6691 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6692 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6693 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6694 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6695 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6696 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6697 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6698 } else { 6699 graph = pcbddc->mat_graph; 6700 } 6701 /* print some info */ 6702 if (pcbddc->dbg_flag) { 6703 IS vertices; 6704 PetscInt nv,nedges,nfaces; 6705 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6706 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6707 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6708 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6709 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6714 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6715 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6716 } 6717 6718 /* sub_schurs init */ 6719 if (!pcbddc->sub_schurs) { 6720 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6721 } 6722 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6723 6724 /* free graph struct */ 6725 if (pcbddc->sub_schurs_rebuild) { 6726 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6727 } 6728 PetscFunctionReturn(0); 6729 } 6730 6731 #undef __FUNCT__ 6732 #define __FUNCT__ "PCBDDCCheckOperator" 6733 PetscErrorCode PCBDDCCheckOperator(PC pc) 6734 { 6735 PC_IS *pcis=(PC_IS*)pc->data; 6736 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6737 PetscErrorCode ierr; 6738 6739 PetscFunctionBegin; 6740 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6741 IS zerodiag = NULL; 6742 Mat S_j,B0_B=NULL; 6743 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6744 PetscScalar *p0_check,*array,*array2; 6745 PetscReal norm; 6746 PetscInt i; 6747 6748 /* B0 and B0_B */ 6749 if (zerodiag) { 6750 IS dummy; 6751 6752 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6753 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6754 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6755 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6756 } 6757 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6758 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6759 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6760 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6761 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6762 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6763 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6764 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6765 /* S_j */ 6766 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6767 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6768 6769 /* mimic vector in \widetilde{W}_\Gamma */ 6770 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6771 /* continuous in primal space */ 6772 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6773 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6774 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6775 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6776 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6777 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6778 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6779 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6780 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6781 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6782 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6783 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6784 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6785 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6786 6787 /* assemble rhs for coarse problem */ 6788 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6789 /* local with Schur */ 6790 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6791 if (zerodiag) { 6792 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6793 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6794 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6795 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6796 } 6797 /* sum on primal nodes the local contributions */ 6798 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6799 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6800 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6801 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6802 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6803 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6804 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6805 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6806 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6807 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6808 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6809 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6810 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6811 /* scale primal nodes (BDDC sums contibutions) */ 6812 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6813 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6814 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6815 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6816 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6817 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6818 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6819 /* global: \widetilde{B0}_B w_\Gamma */ 6820 if (zerodiag) { 6821 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6822 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6823 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6824 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6825 } 6826 /* BDDC */ 6827 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6828 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6829 6830 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6831 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6832 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6833 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6834 for (i=0;i<pcbddc->benign_n;i++) { 6835 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6836 } 6837 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6838 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6839 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6840 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6841 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6842 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6843 } 6844 PetscFunctionReturn(0); 6845 } 6846