1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 9 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 10 { 11 PetscErrorCode ierr; 12 Vec local,global; 13 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 14 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 15 16 PetscFunctionBegin; 17 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 18 /* need to convert from global to local topology information and remove references to information in global ordering */ 19 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 20 if (pcbddc->user_provided_isfordofs) { 21 if (pcbddc->n_ISForDofs) { 22 PetscInt i; 23 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 24 for (i=0;i<pcbddc->n_ISForDofs;i++) { 25 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 26 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 27 } 28 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 29 pcbddc->n_ISForDofs = 0; 30 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 31 } 32 } else { 33 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 34 PetscInt i, n = matis->A->rmap->n; 35 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 36 if (i > 1) { 37 pcbddc->n_ISForDofsLocal = i; 38 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 39 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 40 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 41 } 42 } 43 } 44 } 45 46 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 47 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 48 } 49 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 50 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 51 } 52 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 53 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 54 } 55 ierr = VecDestroy(&global);CHKERRQ(ierr); 56 ierr = VecDestroy(&local);CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 62 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 63 { 64 PC_IS *pcis = (PC_IS*)(pc->data); 65 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 if (!pcbddc->benign_have_null) { 70 PetscFunctionReturn(0); 71 } 72 if (pcbddc->ChangeOfBasisMatrix) { 73 Vec swap; 74 75 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 76 swap = pcbddc->work_change; 77 pcbddc->work_change = r; 78 r = swap; 79 } 80 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 82 if (pcbddc->benign_n) { 83 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 84 } else { 85 ierr = VecSet(pcis->vec2_D,0.);CHKERRQ(ierr); 86 } 87 ierr = VecSet(z,0.);CHKERRQ(ierr); 88 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 89 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 90 if (pcbddc->ChangeOfBasisMatrix) { 91 Vec swap; 92 93 swap = r; 94 r = pcbddc->work_change; 95 pcbddc->work_change = swap; 96 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 97 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 104 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 105 { 106 PCBDDCBenignMatMult_ctx ctx; 107 PetscErrorCode ierr; 108 PetscBool apply_right,apply_left,reset_x; 109 110 PetscFunctionBegin; 111 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 112 if (transpose) { 113 apply_right = ctx->apply_left; 114 apply_left = ctx->apply_right; 115 } else { 116 apply_right = ctx->apply_right; 117 apply_left = ctx->apply_left; 118 } 119 reset_x = PETSC_FALSE; 120 if (apply_right) { 121 const PetscScalar *ax; 122 PetscInt nl,i; 123 124 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 125 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 126 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 127 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 128 for (i=0;i<ctx->benign_n;i++) { 129 PetscScalar sum,val; 130 const PetscInt *idxs; 131 PetscInt nz,j; 132 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 133 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 134 sum = 0.; 135 if (ctx->apply_p0) { 136 val = ctx->work[idxs[nz-1]]; 137 for (j=0;j<nz-1;j++) { 138 sum += ctx->work[idxs[j]]; 139 ctx->work[idxs[j]] += val; 140 } 141 } else { 142 for (j=0;j<nz-1;j++) { 143 sum += ctx->work[idxs[j]]; 144 } 145 } 146 ctx->work[idxs[nz-1]] -= sum; 147 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 148 } 149 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 150 reset_x = PETSC_TRUE; 151 } 152 if (transpose) { 153 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 154 } else { 155 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 156 } 157 if (reset_x) { 158 ierr = VecResetArray(x);CHKERRQ(ierr); 159 } 160 if (apply_left) { 161 PetscScalar *ay; 162 PetscInt i; 163 164 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 165 for (i=0;i<ctx->benign_n;i++) { 166 PetscScalar sum,val; 167 const PetscInt *idxs; 168 PetscInt nz,j; 169 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 170 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 171 val = -ay[idxs[nz-1]]; 172 if (ctx->apply_p0) { 173 sum = 0.; 174 for (j=0;j<nz-1;j++) { 175 sum += ay[idxs[j]]; 176 ay[idxs[j]] += val; 177 } 178 ay[idxs[nz-1]] += sum; 179 } else { 180 for (j=0;j<nz-1;j++) { 181 ay[idxs[j]] += val; 182 } 183 ay[idxs[nz-1]] = 0.; 184 } 185 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 186 } 187 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 194 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 195 { 196 PetscErrorCode ierr; 197 198 PetscFunctionBegin; 199 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 205 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 206 { 207 PetscErrorCode ierr; 208 209 PetscFunctionBegin; 210 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "PCBDDCBenignShellMat" 216 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 217 { 218 PC_IS *pcis = (PC_IS*)pc->data; 219 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 220 PCBDDCBenignMatMult_ctx ctx; 221 PetscErrorCode ierr; 222 223 PetscFunctionBegin; 224 if (!restore) { 225 Mat A_IB,A_BI; 226 PetscScalar *work; 227 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 228 229 if (pcbddc->benign_original_mat) { 230 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 231 } 232 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 233 PetscFunctionReturn(0); 234 } 235 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 237 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 238 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 239 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 240 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 241 ierr = PetscNew(&ctx);CHKERRQ(ierr); 242 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 243 ctx->apply_left = PETSC_TRUE; 244 ctx->apply_right = PETSC_FALSE; 245 ctx->apply_p0 = PETSC_FALSE; 246 ctx->benign_n = pcbddc->benign_n; 247 if (reuse) { 248 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 249 ctx->free = PETSC_FALSE; 250 } else { /* TODO: could be optimized for successive solves */ 251 ISLocalToGlobalMapping N_to_D; 252 PetscInt i; 253 254 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 255 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 256 for (i=0;i<pcbddc->benign_n;i++) { 257 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 258 } 259 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 260 ctx->free = PETSC_TRUE; 261 } 262 ctx->A = pcis->A_IB; 263 ctx->work = work; 264 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 265 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 266 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 267 pcis->A_IB = A_IB; 268 269 /* A_BI as A_IB^T */ 270 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 271 pcbddc->benign_original_mat = pcis->A_BI; 272 pcis->A_BI = A_BI; 273 } else { 274 if (!pcbddc->benign_original_mat) { 275 PetscFunctionReturn(0); 276 } 277 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 278 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 279 pcis->A_IB = ctx->A; 280 ctx->A = NULL; 281 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 282 pcis->A_BI = pcbddc->benign_original_mat; 283 pcbddc->benign_original_mat = NULL; 284 if (ctx->free) { 285 PetscInt i; 286 for (i=0;i<ctx->benign_n;i++) { 287 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 288 } 289 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 290 } 291 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 292 ierr = PetscFree(ctx);CHKERRQ(ierr); 293 } 294 PetscFunctionReturn(0); 295 } 296 297 /* used just in bddc debug mode */ 298 #undef __FUNCT__ 299 #define __FUNCT__ "PCBDDCBenignProject" 300 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 301 { 302 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 303 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 304 Mat An; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 309 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 310 if (is1) { 311 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 312 ierr = MatDestroy(&An);CHKERRQ(ierr); 313 } else { 314 *B = An; 315 } 316 PetscFunctionReturn(0); 317 } 318 319 /* TODO: add reuse flag */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSeqAIJCompress" 322 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 323 { 324 Mat Bt; 325 PetscScalar *a,*bdata; 326 const PetscInt *ii,*ij; 327 PetscInt m,n,i,nnz,*bii,*bij; 328 PetscBool flg_row; 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 333 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 334 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 335 nnz = n; 336 for (i=0;i<ii[n];i++) { 337 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 338 } 339 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 340 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 341 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 342 nnz = 0; 343 bii[0] = 0; 344 for (i=0;i<n;i++) { 345 PetscInt j; 346 for (j=ii[i];j<ii[i+1];j++) { 347 PetscScalar entry = a[j]; 348 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 349 bij[nnz] = ij[j]; 350 bdata[nnz] = entry; 351 nnz++; 352 } 353 } 354 bii[i+1] = nnz; 355 } 356 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 357 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 358 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 359 { 360 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 361 b->free_a = PETSC_TRUE; 362 b->free_ij = PETSC_TRUE; 363 } 364 *B = Bt; 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatDetectDisconnectedComponents" 370 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 371 { 372 Mat B; 373 IS is_dummy,*cc_n; 374 ISLocalToGlobalMapping l2gmap_dummy; 375 PCBDDCGraph graph; 376 PetscInt i,n; 377 PetscInt *xadj,*adjncy; 378 PetscInt *xadj_filtered,*adjncy_filtered; 379 PetscBool flg_row,isseqaij; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 if (!A->rmap->N || !A->cmap->N) { 384 *ncc = 0; 385 *cc = NULL; 386 PetscFunctionReturn(0); 387 } 388 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 389 if (!isseqaij && filter) { 390 PetscBool isseqdense; 391 392 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 393 if (!isseqdense) { 394 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 395 } else { /* TODO: rectangular case and LDA */ 396 PetscScalar *array; 397 PetscReal chop=1.e-6; 398 399 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 400 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 401 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 402 for (i=0;i<n;i++) { 403 PetscInt j; 404 for (j=i+1;j<n;j++) { 405 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 406 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 407 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 408 } 409 } 410 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 411 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 412 } 413 } else { 414 B = A; 415 } 416 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 417 418 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 419 if (filter) { 420 PetscScalar *data; 421 PetscInt j,cum; 422 423 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 424 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 425 cum = 0; 426 for (i=0;i<n;i++) { 427 PetscInt t; 428 429 for (j=xadj[i];j<xadj[i+1];j++) { 430 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 431 continue; 432 } 433 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 434 } 435 t = xadj_filtered[i]; 436 xadj_filtered[i] = cum; 437 cum += t; 438 } 439 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 440 } else { 441 xadj_filtered = NULL; 442 adjncy_filtered = NULL; 443 } 444 445 /* compute local connected components using PCBDDCGraph */ 446 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 447 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 448 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 449 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 450 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 451 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 452 if (xadj_filtered) { 453 graph->xadj = xadj_filtered; 454 graph->adjncy = adjncy_filtered; 455 } else { 456 graph->xadj = xadj; 457 graph->adjncy = adjncy; 458 } 459 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 460 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 461 /* partial clean up */ 462 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 463 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 464 if (A != B) { 465 ierr = MatDestroy(&B);CHKERRQ(ierr); 466 } 467 468 /* get back data */ 469 if (ncc) *ncc = graph->ncc; 470 if (cc) { 471 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 472 for (i=0;i<graph->ncc;i++) { 473 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 474 } 475 *cc = cc_n; 476 } 477 /* clean up graph */ 478 graph->xadj = 0; 479 graph->adjncy = 0; 480 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCBDDCBenignCheck" 486 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 487 { 488 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 489 PC_IS* pcis = (PC_IS*)(pc->data); 490 IS dirIS = NULL; 491 PetscInt i; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 496 if (zerodiag) { 497 Mat A; 498 Vec vec3_N; 499 PetscScalar *vals; 500 const PetscInt *idxs; 501 PetscInt nz,*count; 502 503 /* p0 */ 504 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 505 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 506 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 507 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 508 for (i=0;i<nz;i++) vals[i] = 1.; 509 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 512 /* v_I */ 513 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 514 for (i=0;i<nz;i++) vals[i] = 0.; 515 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 516 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 517 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 518 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 519 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 520 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 521 if (dirIS) { 522 PetscInt n; 523 524 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 525 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 526 for (i=0;i<n;i++) vals[i] = 0.; 527 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 528 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 529 } 530 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 532 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 533 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 534 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 535 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 536 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 537 if (PetscAbsScalar(vals[0]) > 1.e-1) { 538 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 539 } 540 ierr = PetscFree(vals);CHKERRQ(ierr); 541 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 542 543 /* there should not be any pressure dofs lying on the interface */ 544 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 545 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 546 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 547 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 548 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 549 for (i=0;i<nz;i++) { 550 if (count[idxs[i]]) { 551 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 552 } 553 } 554 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 555 ierr = PetscFree(count);CHKERRQ(ierr); 556 } 557 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 558 559 /* check PCBDDCBenignGetOrSetP0 */ 560 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 561 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 562 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 563 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 564 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 565 for (i=0;i<pcbddc->benign_n;i++) { 566 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 567 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 568 } 569 } 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 575 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 576 { 577 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 578 IS pressures,zerodiag,*zerodiag_subs; 579 PetscInt nz,n; 580 PetscInt *interior_dofs,n_interior_dofs; 581 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 582 PetscErrorCode ierr; 583 584 PetscFunctionBegin; 585 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 586 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 587 for (n=0;n<pcbddc->benign_n;n++) { 588 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 589 } 590 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 591 pcbddc->benign_n = 0; 592 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 593 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 594 Checks if all the pressure dofs in each subdomain have a zero diagonal 595 If not, a change of basis on pressures is not needed 596 since the local Schur complements are SPD 597 */ 598 has_null_pressures = PETSC_TRUE; 599 have_null = PETSC_TRUE; 600 if (pcbddc->n_ISForDofsLocal) { 601 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 602 603 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 604 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 605 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 607 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 608 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 609 if (!sorted) { 610 ierr = ISSort(pressures);CHKERRQ(ierr); 611 } 612 } else { 613 pressures = NULL; 614 } 615 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 616 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 617 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 618 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 619 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 620 if (!sorted) { 621 ierr = ISSort(zerodiag);CHKERRQ(ierr); 622 } 623 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 624 if (!nz) { 625 if (n) have_null = PETSC_FALSE; 626 has_null_pressures = PETSC_FALSE; 627 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 628 } 629 recompute_zerodiag = PETSC_FALSE; 630 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 631 zerodiag_subs = NULL; 632 pcbddc->benign_n = 0; 633 n_interior_dofs = 0; 634 interior_dofs = NULL; 635 if (pcbddc->current_level) { /* need to compute interior nodes */ 636 PetscInt n,i,j; 637 PetscInt n_neigh,*neigh,*n_shared,**shared; 638 PetscInt *iwork; 639 640 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 641 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 642 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 643 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 644 for (i=0;i<n_neigh;i++) 645 for (j=0;j<n_shared[i];j++) 646 iwork[shared[i][j]] += 1; 647 for (i=0;i<n;i++) 648 if (!iwork[i]) 649 interior_dofs[n_interior_dofs++] = i; 650 ierr = PetscFree(iwork);CHKERRQ(ierr); 651 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 652 } 653 if (has_null_pressures) { 654 IS *subs; 655 PetscInt nsubs,i,j,nl; 656 const PetscInt *idxs; 657 PetscScalar *array; 658 Vec *work; 659 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 660 661 subs = pcbddc->local_subs; 662 nsubs = pcbddc->n_local_subs; 663 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 664 if (pcbddc->current_level) { 665 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 666 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 667 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 668 /* work[0] = 1_p */ 669 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 670 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 671 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 672 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 673 /* work[0] = 1_v */ 674 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 675 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 676 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 677 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 678 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 679 } 680 if (nsubs > 1) { 681 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 682 for (i=0;i<nsubs;i++) { 683 ISLocalToGlobalMapping l2g; 684 IS t_zerodiag_subs; 685 PetscInt nl; 686 687 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 688 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 689 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 690 if (nl) { 691 PetscBool valid = PETSC_TRUE; 692 693 if (pcbddc->current_level) { 694 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 695 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 696 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 697 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 698 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 699 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 701 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 702 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 703 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 704 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 705 for (j=0;j<n_interior_dofs;j++) { 706 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 707 valid = PETSC_FALSE; 708 break; 709 } 710 } 711 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 712 } 713 if (valid && pcbddc->NeumannBoundariesLocal) { 714 IS t_bc; 715 PetscInt nzb; 716 717 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 718 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 719 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 720 if (nzb) valid = PETSC_FALSE; 721 } 722 if (valid && pressures) { 723 IS t_pressure_subs; 724 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 725 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 726 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 727 } 728 if (valid) { 729 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 730 pcbddc->benign_n++; 731 } else { 732 recompute_zerodiag = PETSC_TRUE; 733 } 734 } 735 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 736 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 737 } 738 } else { /* there's just one subdomain (or zero if they have not been detected */ 739 PetscBool valid = PETSC_TRUE; 740 741 if (pcbddc->NeumannBoundariesLocal) { 742 PetscInt nzb; 743 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 744 if (nzb) valid = PETSC_FALSE; 745 } 746 if (valid && pressures) { 747 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 748 } 749 if (valid && pcbddc->current_level) { 750 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 751 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 752 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 753 for (j=0;j<n_interior_dofs;j++) { 754 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 755 valid = PETSC_FALSE; 756 break; 757 } 758 } 759 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 760 } 761 if (valid) { 762 pcbddc->benign_n = 1; 763 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 764 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 765 zerodiag_subs[0] = zerodiag; 766 } 767 } 768 if (pcbddc->current_level) { 769 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 770 } 771 } 772 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 773 774 if (!pcbddc->benign_n) { 775 PetscInt n; 776 777 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 778 recompute_zerodiag = PETSC_FALSE; 779 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 780 if (n) { 781 has_null_pressures = PETSC_FALSE; 782 have_null = PETSC_FALSE; 783 } 784 } 785 786 /* final check for null pressures */ 787 if (zerodiag && pressures) { 788 PetscInt nz,np; 789 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 790 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 791 if (nz != np) have_null = PETSC_FALSE; 792 } 793 794 if (recompute_zerodiag) { 795 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 796 if (pcbddc->benign_n == 1) { 797 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 798 zerodiag = zerodiag_subs[0]; 799 } else { 800 PetscInt i,nzn,*new_idxs; 801 802 nzn = 0; 803 for (i=0;i<pcbddc->benign_n;i++) { 804 PetscInt ns; 805 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 806 nzn += ns; 807 } 808 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 809 nzn = 0; 810 for (i=0;i<pcbddc->benign_n;i++) { 811 PetscInt ns,*idxs; 812 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 813 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 814 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 815 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 816 nzn += ns; 817 } 818 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 819 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 820 } 821 have_null = PETSC_FALSE; 822 } 823 824 /* no-net-flux */ 825 if (pcbddc->benign_compute_nonetflux) { 826 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 827 MatNullSpace near_null_space; 828 ISLocalToGlobalMapping rmap; 829 Vec quad_vec; 830 PetscScalar *pvals; 831 PetscInt i,np,*dummyins; 832 IS isused = NULL; 833 PetscBool participate = PETSC_TRUE; 834 835 /* create vector to hold quadrature weights */ 836 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 837 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 838 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 839 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 840 841 /* compute B^{(i)T} * 1_p */ 842 np = 0; 843 if (pressures) { 844 isused = pressures; 845 } else { 846 isused = zerodiag; 847 } 848 if (isused) { 849 ierr = ISGetLocalSize(isused,&np);CHKERRQ(ierr); 850 } 851 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 852 for (i=0;i<np;i++) pvals[i] = 1.; 853 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 854 if (np) { 855 const PetscInt *pidxs; 856 857 if (isused) { 858 ierr = ISGetIndices(isused,&pidxs);CHKERRQ(ierr); 859 } 860 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 861 if (isused) { 862 ierr = ISRestoreIndices(isused,&pidxs);CHKERRQ(ierr); 863 } 864 } 865 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 866 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 867 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 868 ierr = PetscFree(pvals);CHKERRQ(ierr); 869 if (!isused) participate = PETSC_FALSE; 870 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 871 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 872 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 873 for (i=0;i<matis->sf->nleaves;i++) 874 if (participate) matis->sf_leafdata[i] = PetscGlobalRank; 875 else matis->sf_leafdata[i] = -1; 876 877 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 878 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 879 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 880 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 881 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 882 for (i=0;i<matis->sf->nleaves;i++) { 883 if (PetscGlobalRank != matis->sf_leafdata[i]) { 884 pvals[i] = 0.; 885 } 886 } 887 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 888 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 889 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 890 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 891 ierr = PetscFree(dummyins);CHKERRQ(ierr); 892 893 /* assembly quadrature vec and attach near null space to pmat */ 894 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 895 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 896 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 897 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 898 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 899 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 900 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 901 } 902 903 /* change of basis and p0 dofs */ 904 if (has_null_pressures) { 905 IS zerodiagc; 906 const PetscInt *idxs,*idxsc; 907 PetscInt i,s,*nnz; 908 909 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 910 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 911 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 912 /* local change of basis for pressures */ 913 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 914 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 915 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 916 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 917 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 918 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 919 for (i=0;i<pcbddc->benign_n;i++) { 920 PetscInt nzs,j; 921 922 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 923 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 924 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 925 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 926 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 927 } 928 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 929 ierr = PetscFree(nnz);CHKERRQ(ierr); 930 /* set identity on velocities */ 931 for (i=0;i<n-nz;i++) { 932 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 933 } 934 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 935 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 936 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 937 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 938 /* set change on pressures */ 939 for (s=0;s<pcbddc->benign_n;s++) { 940 PetscScalar *array; 941 PetscInt nzs; 942 943 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 944 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 945 for (i=0;i<nzs-1;i++) { 946 PetscScalar vals[2]; 947 PetscInt cols[2]; 948 949 cols[0] = idxs[i]; 950 cols[1] = idxs[nzs-1]; 951 vals[0] = 1.; 952 vals[1] = 1.; 953 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 954 } 955 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 956 for (i=0;i<nzs-1;i++) array[i] = -1.; 957 array[nzs-1] = 1.; 958 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 959 /* store local idxs for p0 */ 960 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 961 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 962 ierr = PetscFree(array);CHKERRQ(ierr); 963 } 964 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 /* project if needed */ 967 if (pcbddc->benign_change_explicit) { 968 Mat M; 969 970 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 971 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 972 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 973 ierr = MatDestroy(&M);CHKERRQ(ierr); 974 } 975 /* store global idxs for p0 */ 976 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 977 } 978 pcbddc->benign_zerodiag_subs = zerodiag_subs; 979 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 980 981 /* determines if the coarse solver will be singular or not */ 982 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 983 /* determines if the problem has subdomains with 0 pressure block */ 984 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 985 *zerodiaglocal = zerodiag; 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 991 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 992 { 993 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 994 PetscScalar *array; 995 PetscErrorCode ierr; 996 997 PetscFunctionBegin; 998 if (!pcbddc->benign_sf) { 999 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1000 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1001 } 1002 if (get) { 1003 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1004 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1005 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1006 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1007 } else { 1008 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1009 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1010 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1011 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1018 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1019 { 1020 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 /* TODO: add error checking 1025 - avoid nested pop (or push) calls. 1026 - cannot push before pop. 1027 - cannot call this if pcbddc->local_mat is NULL 1028 */ 1029 if (!pcbddc->benign_n) { 1030 PetscFunctionReturn(0); 1031 } 1032 if (pop) { 1033 if (pcbddc->benign_change_explicit) { 1034 IS is_p0; 1035 MatReuse reuse; 1036 1037 /* extract B_0 */ 1038 reuse = MAT_INITIAL_MATRIX; 1039 if (pcbddc->benign_B0) { 1040 reuse = MAT_REUSE_MATRIX; 1041 } 1042 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1043 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1044 /* remove rows and cols from local problem */ 1045 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1046 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1047 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1048 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1049 } else { 1050 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1051 PetscScalar *vals; 1052 PetscInt i,n,*idxs_ins; 1053 1054 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1055 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1056 if (!pcbddc->benign_B0) { 1057 PetscInt *nnz; 1058 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1059 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1060 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1061 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1062 for (i=0;i<pcbddc->benign_n;i++) { 1063 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1064 nnz[i] = n - nnz[i]; 1065 } 1066 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1067 ierr = PetscFree(nnz);CHKERRQ(ierr); 1068 } 1069 1070 for (i=0;i<pcbddc->benign_n;i++) { 1071 PetscScalar *array; 1072 PetscInt *idxs,j,nz,cum; 1073 1074 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1075 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1076 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1077 for (j=0;j<nz;j++) vals[j] = 1.; 1078 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1079 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1080 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1081 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1082 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1083 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1084 cum = 0; 1085 for (j=0;j<n;j++) { 1086 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1087 vals[cum] = array[j]; 1088 idxs_ins[cum] = j; 1089 cum++; 1090 } 1091 } 1092 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1093 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1094 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1095 } 1096 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1097 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1098 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1099 } 1100 } else { /* push */ 1101 if (pcbddc->benign_change_explicit) { 1102 PetscInt i; 1103 1104 for (i=0;i<pcbddc->benign_n;i++) { 1105 PetscScalar *B0_vals; 1106 PetscInt *B0_cols,B0_ncol; 1107 1108 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1109 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1110 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1111 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1112 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1113 } 1114 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1116 } else { 1117 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1118 } 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1125 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1126 { 1127 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1128 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1129 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1130 PetscBLASInt *B_iwork,*B_ifail; 1131 PetscScalar *work,lwork; 1132 PetscScalar *St,*S,*eigv; 1133 PetscScalar *Sarray,*Starray; 1134 PetscReal *eigs,thresh; 1135 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1136 PetscBool allocated_S_St; 1137 #if defined(PETSC_USE_COMPLEX) 1138 PetscReal *rwork; 1139 #endif 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1144 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1145 1146 if (pcbddc->dbg_flag) { 1147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1148 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1150 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1151 } 1152 1153 if (pcbddc->dbg_flag) { 1154 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1155 } 1156 1157 /* max size of subsets */ 1158 mss = 0; 1159 for (i=0;i<sub_schurs->n_subs;i++) { 1160 PetscInt subset_size; 1161 1162 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1163 mss = PetscMax(mss,subset_size); 1164 } 1165 1166 /* min/max and threshold */ 1167 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1168 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1169 nmax = PetscMax(nmin,nmax); 1170 allocated_S_St = PETSC_FALSE; 1171 if (nmin) { 1172 allocated_S_St = PETSC_TRUE; 1173 } 1174 1175 /* allocate lapack workspace */ 1176 cum = cum2 = 0; 1177 maxneigs = 0; 1178 for (i=0;i<sub_schurs->n_subs;i++) { 1179 PetscInt n,subset_size; 1180 1181 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1182 n = PetscMin(subset_size,nmax); 1183 cum += subset_size; 1184 cum2 += subset_size*n; 1185 maxneigs = PetscMax(maxneigs,n); 1186 } 1187 if (mss) { 1188 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1189 PetscBLASInt B_itype = 1; 1190 PetscBLASInt B_N = mss; 1191 PetscReal zero = 0.0; 1192 PetscReal eps = 0.0; /* dlamch? */ 1193 1194 B_lwork = -1; 1195 S = NULL; 1196 St = NULL; 1197 eigs = NULL; 1198 eigv = NULL; 1199 B_iwork = NULL; 1200 B_ifail = NULL; 1201 #if defined(PETSC_USE_COMPLEX) 1202 rwork = NULL; 1203 #endif 1204 thresh = 1.0; 1205 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1206 #if defined(PETSC_USE_COMPLEX) 1207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1208 #else 1209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1210 #endif 1211 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1212 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1213 } else { 1214 /* TODO */ 1215 } 1216 } else { 1217 lwork = 0; 1218 } 1219 1220 nv = 0; 1221 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1222 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1223 } 1224 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1225 if (allocated_S_St) { 1226 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1227 } 1228 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1229 #if defined(PETSC_USE_COMPLEX) 1230 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1231 #endif 1232 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1233 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1234 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1235 nv+cum,&pcbddc->adaptive_constraints_idxs, 1236 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1237 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1238 1239 maxneigs = 0; 1240 cum = cumarray = 0; 1241 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1242 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1243 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1244 const PetscInt *idxs; 1245 1246 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1247 for (cum=0;cum<nv;cum++) { 1248 pcbddc->adaptive_constraints_n[cum] = 1; 1249 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1250 pcbddc->adaptive_constraints_data[cum] = 1.0; 1251 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1252 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1253 } 1254 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1255 } 1256 1257 if (mss) { /* multilevel */ 1258 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1259 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1260 } 1261 1262 thresh = pcbddc->adaptive_threshold; 1263 for (i=0;i<sub_schurs->n_subs;i++) { 1264 const PetscInt *idxs; 1265 PetscReal upper,lower; 1266 PetscInt j,subset_size,eigs_start = 0; 1267 PetscBLASInt B_N; 1268 PetscBool same_data = PETSC_FALSE; 1269 1270 if (pcbddc->use_deluxe_scaling) { 1271 upper = PETSC_MAX_REAL; 1272 lower = thresh; 1273 } else { 1274 upper = 1./thresh; 1275 lower = 0.; 1276 } 1277 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1278 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1279 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1280 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1281 if (sub_schurs->is_hermitian) { 1282 PetscInt j,k; 1283 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1284 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1285 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1286 } 1287 for (j=0;j<subset_size;j++) { 1288 for (k=j;k<subset_size;k++) { 1289 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1290 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1291 } 1292 } 1293 } else { 1294 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1295 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1296 } 1297 } else { 1298 S = Sarray + cumarray; 1299 St = Starray + cumarray; 1300 } 1301 /* see if we can save some work */ 1302 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1303 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1304 } 1305 1306 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1307 B_neigs = 0; 1308 } else { 1309 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1310 PetscBLASInt B_itype = 1; 1311 PetscBLASInt B_IL, B_IU; 1312 PetscReal eps = -1.0; /* dlamch? */ 1313 PetscInt nmin_s; 1314 PetscBool compute_range = PETSC_FALSE; 1315 1316 if (pcbddc->dbg_flag) { 1317 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1318 } 1319 1320 compute_range = PETSC_FALSE; 1321 if (thresh > 1.+PETSC_SMALL && !same_data) { 1322 compute_range = PETSC_TRUE; 1323 } 1324 1325 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1326 if (compute_range) { 1327 1328 /* ask for eigenvalues larger than thresh */ 1329 #if defined(PETSC_USE_COMPLEX) 1330 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1331 #else 1332 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1333 #endif 1334 } else if (!same_data) { 1335 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1336 B_IL = 1; 1337 #if defined(PETSC_USE_COMPLEX) 1338 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1339 #else 1340 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1341 #endif 1342 } else { /* same_data is true, so get the adaptive function requested by the user */ 1343 PetscInt k; 1344 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1345 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1346 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1347 nmin = nmax; 1348 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1349 for (k=0;k<nmax;k++) { 1350 eigs[k] = 1./PETSC_SMALL; 1351 eigv[k*(subset_size+1)] = 1.0; 1352 } 1353 } 1354 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1355 if (B_ierr) { 1356 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1357 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1358 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1359 } 1360 1361 if (B_neigs > nmax) { 1362 if (pcbddc->dbg_flag) { 1363 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1364 } 1365 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1366 B_neigs = nmax; 1367 } 1368 1369 nmin_s = PetscMin(nmin,B_N); 1370 if (B_neigs < nmin_s) { 1371 PetscBLASInt B_neigs2; 1372 1373 if (pcbddc->use_deluxe_scaling) { 1374 B_IL = B_N - nmin_s + 1; 1375 B_IU = B_N - B_neigs; 1376 } else { 1377 B_IL = B_neigs + 1; 1378 B_IU = nmin_s; 1379 } 1380 if (pcbddc->dbg_flag) { 1381 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1382 } 1383 if (sub_schurs->is_hermitian) { 1384 PetscInt j; 1385 for (j=0;j<subset_size;j++) { 1386 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1387 } 1388 for (j=0;j<subset_size;j++) { 1389 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1390 } 1391 } else { 1392 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1393 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1394 } 1395 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1396 #if defined(PETSC_USE_COMPLEX) 1397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1398 #else 1399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1400 #endif 1401 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1402 B_neigs += B_neigs2; 1403 } 1404 if (B_ierr) { 1405 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1406 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1407 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1408 } 1409 if (pcbddc->dbg_flag) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1411 for (j=0;j<B_neigs;j++) { 1412 if (eigs[j] == 0.0) { 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1414 } else { 1415 if (pcbddc->use_deluxe_scaling) { 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1417 } else { 1418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1419 } 1420 } 1421 } 1422 } 1423 } else { 1424 /* TODO */ 1425 } 1426 } 1427 /* change the basis back to the original one */ 1428 if (sub_schurs->change) { 1429 Mat change,phi,phit; 1430 1431 if (pcbddc->dbg_flag > 1) { 1432 PetscInt ii; 1433 for (ii=0;ii<B_neigs;ii++) { 1434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1435 for (j=0;j<B_N;j++) { 1436 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1437 } 1438 } 1439 } 1440 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1441 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1442 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1443 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1444 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1445 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1446 } 1447 maxneigs = PetscMax(B_neigs,maxneigs); 1448 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1449 if (B_neigs) { 1450 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1451 1452 if (pcbddc->dbg_flag > 1) { 1453 PetscInt ii; 1454 for (ii=0;ii<B_neigs;ii++) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1456 for (j=0;j<B_N;j++) { 1457 #if defined(PETSC_USE_COMPLEX) 1458 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1459 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1461 #else 1462 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1463 #endif 1464 } 1465 } 1466 } 1467 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1468 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1469 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1470 cum++; 1471 } 1472 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1473 /* shift for next computation */ 1474 cumarray += subset_size*subset_size; 1475 } 1476 if (pcbddc->dbg_flag) { 1477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1478 } 1479 1480 if (mss) { 1481 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1482 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1483 /* destroy matrices (junk) */ 1484 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1485 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1486 } 1487 if (allocated_S_St) { 1488 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1489 } 1490 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1491 #if defined(PETSC_USE_COMPLEX) 1492 ierr = PetscFree(rwork);CHKERRQ(ierr); 1493 #endif 1494 if (pcbddc->dbg_flag) { 1495 PetscInt maxneigs_r; 1496 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1497 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "PCBDDCSetUpSolvers" 1504 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1505 { 1506 PetscScalar *coarse_submat_vals; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1511 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1512 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1513 1514 /* Setup local neumann solver ksp_R */ 1515 /* PCBDDCSetUpLocalScatters should be called first! */ 1516 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1517 1518 /* 1519 Setup local correction and local part of coarse basis. 1520 Gives back the dense local part of the coarse matrix in column major ordering 1521 */ 1522 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1523 1524 /* Compute total number of coarse nodes and setup coarse solver */ 1525 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1526 1527 /* free */ 1528 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "PCBDDCResetCustomization" 1534 PetscErrorCode PCBDDCResetCustomization(PC pc) 1535 { 1536 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1541 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1542 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1543 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1544 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1545 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1546 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1547 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1548 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1549 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1550 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "PCBDDCResetTopography" 1556 PetscErrorCode PCBDDCResetTopography(PC pc) 1557 { 1558 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1559 PetscInt i; 1560 PetscErrorCode ierr; 1561 1562 PetscFunctionBegin; 1563 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1564 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1565 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1566 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1567 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1568 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1569 for (i=0;i<pcbddc->n_local_subs;i++) { 1570 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1571 } 1572 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1573 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "PCBDDCResetSolvers" 1579 PetscErrorCode PCBDDCResetSolvers(PC pc) 1580 { 1581 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1586 if (pcbddc->coarse_phi_B) { 1587 PetscScalar *array; 1588 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1589 ierr = PetscFree(array);CHKERRQ(ierr); 1590 } 1591 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1592 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1593 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1594 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1595 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1596 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1597 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1598 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1599 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1600 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1601 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1602 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1603 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1604 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1605 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1606 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1607 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1608 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1609 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1610 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1611 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1612 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1613 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1614 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1615 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1616 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1617 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1618 if (pcbddc->benign_zerodiag_subs) { 1619 PetscInt i; 1620 for (i=0;i<pcbddc->benign_n;i++) { 1621 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1622 } 1623 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1624 } 1625 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1631 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1632 { 1633 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1634 PC_IS *pcis = (PC_IS*)pc->data; 1635 VecType impVecType; 1636 PetscInt n_constraints,n_R,old_size; 1637 PetscErrorCode ierr; 1638 1639 PetscFunctionBegin; 1640 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1641 /* get sizes */ 1642 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1643 n_R = pcis->n - pcbddc->n_vertices; 1644 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1645 /* local work vectors (try to avoid unneeded work)*/ 1646 /* R nodes */ 1647 old_size = -1; 1648 if (pcbddc->vec1_R) { 1649 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1650 } 1651 if (n_R != old_size) { 1652 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1653 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1654 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1655 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1656 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1657 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1658 } 1659 /* local primal dofs */ 1660 old_size = -1; 1661 if (pcbddc->vec1_P) { 1662 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1663 } 1664 if (pcbddc->local_primal_size != old_size) { 1665 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1666 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1667 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1668 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1669 } 1670 /* local explicit constraints */ 1671 old_size = -1; 1672 if (pcbddc->vec1_C) { 1673 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1674 } 1675 if (n_constraints && n_constraints != old_size) { 1676 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1677 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1678 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1679 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "PCBDDCSetUpCorrection" 1686 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1687 { 1688 PetscErrorCode ierr; 1689 /* pointers to pcis and pcbddc */ 1690 PC_IS* pcis = (PC_IS*)pc->data; 1691 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1692 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1693 /* submatrices of local problem */ 1694 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1695 /* submatrices of local coarse problem */ 1696 Mat S_VV,S_CV,S_VC,S_CC; 1697 /* working matrices */ 1698 Mat C_CR; 1699 /* additional working stuff */ 1700 PC pc_R; 1701 Mat F; 1702 Vec dummy_vec; 1703 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1704 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1705 PetscScalar *work; 1706 PetscInt *idx_V_B; 1707 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1708 PetscInt i,n_R,n_D,n_B; 1709 1710 /* some shortcuts to scalars */ 1711 PetscScalar one=1.0,m_one=-1.0; 1712 1713 PetscFunctionBegin; 1714 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1715 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1716 } 1717 1718 /* Set Non-overlapping dimensions */ 1719 n_vertices = pcbddc->n_vertices; 1720 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1721 n_B = pcis->n_B; 1722 n_D = pcis->n - n_B; 1723 n_R = pcis->n - n_vertices; 1724 1725 /* vertices in boundary numbering */ 1726 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1727 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1728 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1729 1730 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1731 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1732 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1733 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1735 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1736 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1737 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1739 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1740 1741 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1742 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1743 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1744 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1745 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1746 lda_rhs = n_R; 1747 need_benign_correction = PETSC_FALSE; 1748 if (isLU || isILU || isCHOL) { 1749 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1750 } else if (sub_schurs->reuse_solver) { 1751 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1752 MatFactorType type; 1753 1754 F = reuse_solver->F; 1755 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1756 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1757 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1758 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1759 } else { 1760 F = NULL; 1761 } 1762 1763 /* allocate workspace */ 1764 n = 0; 1765 if (n_constraints) { 1766 n += lda_rhs*n_constraints; 1767 } 1768 if (n_vertices) { 1769 n = PetscMax(2*lda_rhs*n_vertices,n); 1770 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1771 } 1772 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1773 1774 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1775 dummy_vec = NULL; 1776 if (need_benign_correction && lda_rhs != n_R && F) { 1777 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1778 } 1779 1780 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1781 if (n_constraints) { 1782 Mat M1,M2,M3,C_B; 1783 IS is_aux; 1784 PetscScalar *array,*array2; 1785 1786 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1787 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1788 1789 /* Extract constraints on R nodes: C_{CR} */ 1790 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1791 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1792 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1793 1794 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1795 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1796 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1797 for (i=0;i<n_constraints;i++) { 1798 const PetscScalar *row_cmat_values; 1799 const PetscInt *row_cmat_indices; 1800 PetscInt size_of_constraint,j; 1801 1802 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1803 for (j=0;j<size_of_constraint;j++) { 1804 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1805 } 1806 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1807 } 1808 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1809 if (F) { 1810 Mat B; 1811 1812 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1813 if (need_benign_correction) { 1814 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1815 1816 /* rhs is already zero on interior dofs, no need to change the rhs */ 1817 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1818 } 1819 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1820 if (need_benign_correction) { 1821 PetscScalar *marr; 1822 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1823 1824 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1825 if (lda_rhs != n_R) { 1826 for (i=0;i<n_constraints;i++) { 1827 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1828 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1829 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1830 } 1831 } else { 1832 for (i=0;i<n_constraints;i++) { 1833 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1834 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1835 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1836 } 1837 } 1838 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1839 } 1840 ierr = MatDestroy(&B);CHKERRQ(ierr); 1841 } else { 1842 PetscScalar *marr; 1843 1844 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1845 for (i=0;i<n_constraints;i++) { 1846 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1847 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1848 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1849 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1850 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1851 } 1852 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1853 } 1854 if (!pcbddc->switch_static) { 1855 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1856 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1857 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1858 for (i=0;i<n_constraints;i++) { 1859 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1860 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1861 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1862 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1863 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1864 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1865 } 1866 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1867 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1868 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1869 } else { 1870 if (lda_rhs != n_R) { 1871 IS dummy; 1872 1873 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1874 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1875 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1876 } else { 1877 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1878 pcbddc->local_auxmat2 = local_auxmat2_R; 1879 } 1880 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1881 } 1882 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1883 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1884 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1885 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1886 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1887 if (isCHOL) { 1888 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1889 } else { 1890 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1891 } 1892 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1893 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1894 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1895 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1896 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1897 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1898 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1899 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1900 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1901 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1902 } 1903 1904 /* Get submatrices from subdomain matrix */ 1905 if (n_vertices) { 1906 IS is_aux; 1907 1908 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1909 IS tis; 1910 1911 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1912 ierr = ISSort(tis);CHKERRQ(ierr); 1913 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1914 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1915 } else { 1916 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1917 } 1918 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1919 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1920 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1921 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1922 } 1923 1924 /* Matrix of coarse basis functions (local) */ 1925 if (pcbddc->coarse_phi_B) { 1926 PetscInt on_B,on_primal,on_D=n_D; 1927 if (pcbddc->coarse_phi_D) { 1928 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1929 } 1930 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1931 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1932 PetscScalar *marray; 1933 1934 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1935 ierr = PetscFree(marray);CHKERRQ(ierr); 1936 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1937 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1938 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1939 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1940 } 1941 } 1942 1943 if (!pcbddc->coarse_phi_B) { 1944 PetscScalar *marray; 1945 1946 n = n_B*pcbddc->local_primal_size; 1947 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1948 n += n_D*pcbddc->local_primal_size; 1949 } 1950 if (!pcbddc->symmetric_primal) { 1951 n *= 2; 1952 } 1953 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1955 n = n_B*pcbddc->local_primal_size; 1956 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1957 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1958 n += n_D*pcbddc->local_primal_size; 1959 } 1960 if (!pcbddc->symmetric_primal) { 1961 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1962 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1963 n = n_B*pcbddc->local_primal_size; 1964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1965 } 1966 } else { 1967 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1968 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1969 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1970 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1971 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1972 } 1973 } 1974 } 1975 1976 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1977 p0_lidx_I = NULL; 1978 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1979 const PetscInt *idxs; 1980 1981 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1982 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1983 for (i=0;i<pcbddc->benign_n;i++) { 1984 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1985 } 1986 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1987 } 1988 1989 /* vertices */ 1990 if (n_vertices) { 1991 1992 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1993 1994 if (n_R) { 1995 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1996 PetscBLASInt B_N,B_one = 1; 1997 PetscScalar *x,*y; 1998 PetscBool isseqaij; 1999 2000 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2001 if (need_benign_correction) { 2002 ISLocalToGlobalMapping RtoN; 2003 IS is_p0; 2004 PetscInt *idxs_p0,n; 2005 2006 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2007 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2008 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2009 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2010 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2011 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2012 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2013 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2014 } 2015 2016 if (lda_rhs == n_R) { 2017 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2018 } else { 2019 PetscScalar *av,*array; 2020 const PetscInt *xadj,*adjncy; 2021 PetscInt n; 2022 PetscBool flg_row; 2023 2024 array = work+lda_rhs*n_vertices; 2025 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2026 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2027 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2028 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2029 for (i=0;i<n;i++) { 2030 PetscInt j; 2031 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2032 } 2033 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2034 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2035 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2036 } 2037 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2038 if (need_benign_correction) { 2039 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2040 PetscScalar *marr; 2041 2042 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2043 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2044 2045 | 0 0 0 | (V) 2046 L = | 0 0 -1 | (P-p0) 2047 | 0 0 -1 | (p0) 2048 2049 */ 2050 for (i=0;i<reuse_solver->benign_n;i++) { 2051 const PetscScalar *vals; 2052 const PetscInt *idxs,*idxs_zero; 2053 PetscInt n,j,nz; 2054 2055 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2056 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2057 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2058 for (j=0;j<n;j++) { 2059 PetscScalar val = vals[j]; 2060 PetscInt k,col = idxs[j]; 2061 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2062 } 2063 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2064 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2065 } 2066 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2067 } 2068 if (F) { 2069 /* need to correct the rhs */ 2070 if (need_benign_correction) { 2071 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2072 PetscScalar *marr; 2073 2074 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2075 if (lda_rhs != n_R) { 2076 for (i=0;i<n_vertices;i++) { 2077 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2078 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2079 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2080 } 2081 } else { 2082 for (i=0;i<n_vertices;i++) { 2083 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2084 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2085 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2086 } 2087 } 2088 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2089 } 2090 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2091 /* need to correct the solution */ 2092 if (need_benign_correction) { 2093 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2094 PetscScalar *marr; 2095 2096 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2097 if (lda_rhs != n_R) { 2098 for (i=0;i<n_vertices;i++) { 2099 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2100 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2101 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2102 } 2103 } else { 2104 for (i=0;i<n_vertices;i++) { 2105 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2106 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2107 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2108 } 2109 } 2110 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2111 } 2112 } else { 2113 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2114 for (i=0;i<n_vertices;i++) { 2115 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2116 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2117 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2118 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2119 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2120 } 2121 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2122 } 2123 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2124 /* S_VV and S_CV */ 2125 if (n_constraints) { 2126 Mat B; 2127 2128 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2129 for (i=0;i<n_vertices;i++) { 2130 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2131 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2132 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2133 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2134 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2135 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2136 } 2137 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2138 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2139 ierr = MatDestroy(&B);CHKERRQ(ierr); 2140 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2141 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2142 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2143 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2144 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2145 ierr = MatDestroy(&B);CHKERRQ(ierr); 2146 } 2147 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2148 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2149 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2150 } 2151 if (lda_rhs != n_R) { 2152 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2154 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2155 } 2156 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2157 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2158 if (need_benign_correction) { 2159 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2160 PetscScalar *marr,*sums; 2161 2162 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2163 ierr = MatDenseGetArray(S_VVt,&marr); 2164 for (i=0;i<reuse_solver->benign_n;i++) { 2165 const PetscScalar *vals; 2166 const PetscInt *idxs,*idxs_zero; 2167 PetscInt n,j,nz; 2168 2169 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2170 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2171 for (j=0;j<n_vertices;j++) { 2172 PetscInt k; 2173 sums[j] = 0.; 2174 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2175 } 2176 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2177 for (j=0;j<n;j++) { 2178 PetscScalar val = vals[j]; 2179 PetscInt k; 2180 for (k=0;k<n_vertices;k++) { 2181 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2182 } 2183 } 2184 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2185 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2186 } 2187 ierr = PetscFree(sums);CHKERRQ(ierr); 2188 ierr = MatDenseRestoreArray(S_VVt,&marr); 2189 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2190 } 2191 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2192 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2193 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2194 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2195 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2196 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2197 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2198 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2199 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2200 } else { 2201 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2202 } 2203 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2204 2205 /* coarse basis functions */ 2206 for (i=0;i<n_vertices;i++) { 2207 PetscScalar *y; 2208 2209 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2210 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2211 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2212 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2213 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2214 y[n_B*i+idx_V_B[i]] = 1.0; 2215 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2216 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2217 2218 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2219 PetscInt j; 2220 2221 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2222 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2223 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2224 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2225 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2226 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2227 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2228 } 2229 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2230 } 2231 /* if n_R == 0 the object is not destroyed */ 2232 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2233 } 2234 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2235 2236 if (n_constraints) { 2237 Mat B; 2238 2239 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2240 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2241 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2242 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2243 if (n_vertices) { 2244 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2245 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2246 } else { 2247 Mat S_VCt; 2248 2249 if (lda_rhs != n_R) { 2250 ierr = MatDestroy(&B);CHKERRQ(ierr); 2251 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2252 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2253 } 2254 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2255 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2256 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2257 } 2258 } 2259 ierr = MatDestroy(&B);CHKERRQ(ierr); 2260 /* coarse basis functions */ 2261 for (i=0;i<n_constraints;i++) { 2262 PetscScalar *y; 2263 2264 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2265 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2266 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2267 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2268 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2269 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2270 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2271 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2272 PetscInt j; 2273 2274 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2275 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2276 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2277 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2278 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2279 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2280 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2281 } 2282 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2283 } 2284 } 2285 if (n_constraints) { 2286 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2287 } 2288 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2289 2290 /* coarse matrix entries relative to B_0 */ 2291 if (pcbddc->benign_n) { 2292 Mat B0_B,B0_BPHI; 2293 IS is_dummy; 2294 PetscScalar *data; 2295 PetscInt j; 2296 2297 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2298 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2299 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2300 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2301 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2302 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2303 for (j=0;j<pcbddc->benign_n;j++) { 2304 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2305 for (i=0;i<pcbddc->local_primal_size;i++) { 2306 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2307 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2308 } 2309 } 2310 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2311 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2312 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2313 } 2314 2315 /* compute other basis functions for non-symmetric problems */ 2316 if (!pcbddc->symmetric_primal) { 2317 Mat B_V=NULL,B_C=NULL; 2318 PetscScalar *marray; 2319 2320 if (n_constraints) { 2321 Mat S_CCT,C_CRT; 2322 2323 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2324 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2325 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2326 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2327 if (n_vertices) { 2328 Mat S_VCT; 2329 2330 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2331 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2332 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2333 } 2334 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2335 } 2336 if (n_vertices && n_R) { 2337 PetscScalar *av,*marray; 2338 const PetscInt *xadj,*adjncy; 2339 PetscInt n; 2340 PetscBool flg_row; 2341 2342 /* B_V = B_V - A_VR^T */ 2343 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2344 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2345 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2346 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2347 for (i=0;i<n;i++) { 2348 PetscInt j; 2349 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2350 } 2351 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2352 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2353 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2354 } 2355 2356 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2357 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2358 for (i=0;i<n_vertices;i++) { 2359 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2360 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2361 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2362 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2363 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2364 } 2365 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2366 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2367 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2368 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2369 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2370 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2371 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2372 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2373 } 2374 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2375 /* coarse basis functions */ 2376 for (i=0;i<pcbddc->local_primal_size;i++) { 2377 PetscScalar *y; 2378 2379 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2380 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2381 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2382 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2383 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2384 if (i<n_vertices) { 2385 y[n_B*i+idx_V_B[i]] = 1.0; 2386 } 2387 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2388 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2389 2390 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2391 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2392 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2393 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2394 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2395 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2396 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2397 } 2398 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2399 } 2400 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2401 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2402 } 2403 /* free memory */ 2404 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2405 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2406 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2407 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2408 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2409 ierr = PetscFree(work);CHKERRQ(ierr); 2410 if (n_vertices) { 2411 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2412 } 2413 if (n_constraints) { 2414 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2415 } 2416 /* Checking coarse_sub_mat and coarse basis functios */ 2417 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2418 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2419 if (pcbddc->dbg_flag) { 2420 Mat coarse_sub_mat; 2421 Mat AUXMAT,TM1,TM2,TM3,TM4; 2422 Mat coarse_phi_D,coarse_phi_B; 2423 Mat coarse_psi_D,coarse_psi_B; 2424 Mat A_II,A_BB,A_IB,A_BI; 2425 Mat C_B,CPHI; 2426 IS is_dummy; 2427 Vec mones; 2428 MatType checkmattype=MATSEQAIJ; 2429 PetscReal real_value; 2430 2431 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2432 Mat A; 2433 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2434 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2435 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2436 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2437 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2438 ierr = MatDestroy(&A);CHKERRQ(ierr); 2439 } else { 2440 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2441 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2442 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2443 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2444 } 2445 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2446 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2447 if (!pcbddc->symmetric_primal) { 2448 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2449 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2450 } 2451 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2452 2453 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2455 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2456 if (!pcbddc->symmetric_primal) { 2457 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2458 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2459 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2460 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2461 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2462 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2463 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2464 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2465 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2466 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2467 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2468 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2469 } else { 2470 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2471 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2472 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2473 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2474 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2475 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2476 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2477 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2478 } 2479 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2480 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2481 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2482 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2483 if (pcbddc->benign_n) { 2484 Mat B0_B,B0_BPHI; 2485 PetscScalar *data,*data2; 2486 PetscInt j; 2487 2488 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2489 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2490 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2491 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2492 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2493 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2494 for (j=0;j<pcbddc->benign_n;j++) { 2495 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2496 for (i=0;i<pcbddc->local_primal_size;i++) { 2497 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2498 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2499 } 2500 } 2501 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2502 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2503 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2504 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2505 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2506 } 2507 #if 0 2508 { 2509 PetscViewer viewer; 2510 char filename[256]; 2511 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2512 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2513 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2514 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2515 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2516 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2517 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2518 if (save_change) { 2519 Mat phi_B; 2520 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2521 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2522 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2523 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2524 } else { 2525 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2526 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2527 } 2528 if (pcbddc->coarse_phi_D) { 2529 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2530 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2531 } 2532 if (pcbddc->coarse_psi_B) { 2533 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2534 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2535 } 2536 if (pcbddc->coarse_psi_D) { 2537 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2538 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2539 } 2540 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2541 } 2542 #endif 2543 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2544 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2545 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2547 2548 /* check constraints */ 2549 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2550 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2551 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2552 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2553 } else { 2554 PetscScalar *data; 2555 Mat tmat; 2556 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2557 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2558 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2559 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2560 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2561 } 2562 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2563 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2564 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2565 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2567 if (!pcbddc->symmetric_primal) { 2568 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2569 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2570 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2571 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2573 } 2574 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2575 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2576 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2577 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2579 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2580 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2581 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2582 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2583 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2584 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2585 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2586 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2587 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2588 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2589 if (!pcbddc->symmetric_primal) { 2590 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2591 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2592 } 2593 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2594 } 2595 /* get back data */ 2596 *coarse_submat_vals_n = coarse_submat_vals; 2597 PetscFunctionReturn(0); 2598 } 2599 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2602 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2603 { 2604 Mat *work_mat; 2605 IS isrow_s,iscol_s; 2606 PetscBool rsorted,csorted; 2607 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2608 PetscErrorCode ierr; 2609 2610 PetscFunctionBegin; 2611 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2612 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2613 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2614 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2615 2616 if (!rsorted) { 2617 const PetscInt *idxs; 2618 PetscInt *idxs_sorted,i; 2619 2620 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2621 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2622 for (i=0;i<rsize;i++) { 2623 idxs_perm_r[i] = i; 2624 } 2625 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2626 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2627 for (i=0;i<rsize;i++) { 2628 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2629 } 2630 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2631 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2632 } else { 2633 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2634 isrow_s = isrow; 2635 } 2636 2637 if (!csorted) { 2638 if (isrow == iscol) { 2639 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2640 iscol_s = isrow_s; 2641 } else { 2642 const PetscInt *idxs; 2643 PetscInt *idxs_sorted,i; 2644 2645 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2646 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2647 for (i=0;i<csize;i++) { 2648 idxs_perm_c[i] = i; 2649 } 2650 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2651 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2652 for (i=0;i<csize;i++) { 2653 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2654 } 2655 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2656 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2657 } 2658 } else { 2659 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2660 iscol_s = iscol; 2661 } 2662 2663 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2664 2665 if (!rsorted || !csorted) { 2666 Mat new_mat; 2667 IS is_perm_r,is_perm_c; 2668 2669 if (!rsorted) { 2670 PetscInt *idxs_r,i; 2671 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2672 for (i=0;i<rsize;i++) { 2673 idxs_r[idxs_perm_r[i]] = i; 2674 } 2675 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2676 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2677 } else { 2678 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2679 } 2680 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2681 2682 if (!csorted) { 2683 if (isrow_s == iscol_s) { 2684 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2685 is_perm_c = is_perm_r; 2686 } else { 2687 PetscInt *idxs_c,i; 2688 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2689 for (i=0;i<csize;i++) { 2690 idxs_c[idxs_perm_c[i]] = i; 2691 } 2692 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2693 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2694 } 2695 } else { 2696 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2697 } 2698 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2699 2700 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2701 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2702 work_mat[0] = new_mat; 2703 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2704 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2705 } 2706 2707 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2708 *B = work_mat[0]; 2709 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2710 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2711 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2712 PetscFunctionReturn(0); 2713 } 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2717 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2718 { 2719 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2720 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2721 Mat new_mat; 2722 IS is_local,is_global; 2723 PetscInt local_size; 2724 PetscBool isseqaij; 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2729 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2730 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2731 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2732 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2733 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2734 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2735 2736 /* check */ 2737 if (pcbddc->dbg_flag) { 2738 Vec x,x_change; 2739 PetscReal error; 2740 2741 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2742 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2743 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2744 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2745 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2746 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2747 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2748 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2749 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2750 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2751 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2752 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2753 ierr = VecDestroy(&x);CHKERRQ(ierr); 2754 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2755 } 2756 2757 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2758 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2759 if (isseqaij) { 2760 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2761 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2762 } else { 2763 Mat work_mat; 2764 2765 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2766 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2767 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2768 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2769 } 2770 if (matis->A->symmetric_set) { 2771 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2772 #if !defined(PETSC_USE_COMPLEX) 2773 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2774 #endif 2775 } 2776 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2782 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2783 { 2784 PC_IS* pcis = (PC_IS*)(pc->data); 2785 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2786 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2787 PetscInt *idx_R_local=NULL; 2788 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2789 PetscInt vbs,bs; 2790 PetscBT bitmask=NULL; 2791 PetscErrorCode ierr; 2792 2793 PetscFunctionBegin; 2794 /* 2795 No need to setup local scatters if 2796 - primal space is unchanged 2797 AND 2798 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2799 AND 2800 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2801 */ 2802 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2803 PetscFunctionReturn(0); 2804 } 2805 /* destroy old objects */ 2806 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2807 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2808 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2809 /* Set Non-overlapping dimensions */ 2810 n_B = pcis->n_B; 2811 n_D = pcis->n - n_B; 2812 n_vertices = pcbddc->n_vertices; 2813 2814 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2815 2816 /* create auxiliary bitmask and allocate workspace */ 2817 if (!sub_schurs->reuse_solver) { 2818 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2819 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2820 for (i=0;i<n_vertices;i++) { 2821 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2822 } 2823 2824 for (i=0, n_R=0; i<pcis->n; i++) { 2825 if (!PetscBTLookup(bitmask,i)) { 2826 idx_R_local[n_R++] = i; 2827 } 2828 } 2829 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2830 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2831 2832 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2833 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2834 } 2835 2836 /* Block code */ 2837 vbs = 1; 2838 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2839 if (bs>1 && !(n_vertices%bs)) { 2840 PetscBool is_blocked = PETSC_TRUE; 2841 PetscInt *vary; 2842 if (!sub_schurs->reuse_solver) { 2843 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2844 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2845 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2846 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2847 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2848 for (i=0; i<pcis->n/bs; i++) { 2849 if (vary[i]!=0 && vary[i]!=bs) { 2850 is_blocked = PETSC_FALSE; 2851 break; 2852 } 2853 } 2854 ierr = PetscFree(vary);CHKERRQ(ierr); 2855 } else { 2856 /* Verify directly the R set */ 2857 for (i=0; i<n_R/bs; i++) { 2858 PetscInt j,node=idx_R_local[bs*i]; 2859 for (j=1; j<bs; j++) { 2860 if (node != idx_R_local[bs*i+j]-j) { 2861 is_blocked = PETSC_FALSE; 2862 break; 2863 } 2864 } 2865 } 2866 } 2867 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2868 vbs = bs; 2869 for (i=0;i<n_R/vbs;i++) { 2870 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2871 } 2872 } 2873 } 2874 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2875 if (sub_schurs->reuse_solver) { 2876 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2877 2878 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2879 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2880 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2881 reuse_solver->is_R = pcbddc->is_R_local; 2882 } else { 2883 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2884 } 2885 2886 /* print some info if requested */ 2887 if (pcbddc->dbg_flag) { 2888 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2889 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2890 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2891 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2892 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2893 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2894 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2895 } 2896 2897 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2898 if (!sub_schurs->reuse_solver) { 2899 IS is_aux1,is_aux2; 2900 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2901 2902 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2903 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2904 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2905 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2906 for (i=0; i<n_D; i++) { 2907 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2908 } 2909 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2910 for (i=0, j=0; i<n_R; i++) { 2911 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2912 aux_array1[j++] = i; 2913 } 2914 } 2915 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2916 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2917 for (i=0, j=0; i<n_B; i++) { 2918 if (!PetscBTLookup(bitmask,is_indices[i])) { 2919 aux_array2[j++] = i; 2920 } 2921 } 2922 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2923 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2924 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2925 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2926 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2927 2928 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2929 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2930 for (i=0, j=0; i<n_R; i++) { 2931 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2932 aux_array1[j++] = i; 2933 } 2934 } 2935 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2936 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2937 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2938 } 2939 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2940 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2941 } else { 2942 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2943 IS tis; 2944 PetscInt schur_size; 2945 2946 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2947 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2948 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2949 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2950 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2951 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2952 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2953 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2954 } 2955 } 2956 PetscFunctionReturn(0); 2957 } 2958 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2962 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2963 { 2964 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2965 PC_IS *pcis = (PC_IS*)pc->data; 2966 PC pc_temp; 2967 Mat A_RR; 2968 MatReuse reuse; 2969 PetscScalar m_one = -1.0; 2970 PetscReal value; 2971 PetscInt n_D,n_R; 2972 PetscBool check_corr[2],issbaij; 2973 PetscErrorCode ierr; 2974 /* prefixes stuff */ 2975 char dir_prefix[256],neu_prefix[256],str_level[16]; 2976 size_t len; 2977 2978 PetscFunctionBegin; 2979 2980 /* compute prefixes */ 2981 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2982 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2983 if (!pcbddc->current_level) { 2984 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2985 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2986 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2987 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2988 } else { 2989 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2990 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2991 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2992 len -= 15; /* remove "pc_bddc_coarse_" */ 2993 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2994 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2995 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2996 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2997 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2998 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2999 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3000 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3001 } 3002 3003 /* DIRICHLET PROBLEM */ 3004 if (dirichlet) { 3005 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3006 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3007 if (!sub_schurs->reuse_solver) { 3008 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3009 } 3010 if (pcbddc->dbg_flag) { 3011 Mat A_IIn; 3012 3013 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3014 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3015 pcis->A_II = A_IIn; 3016 } 3017 } 3018 if (pcbddc->local_mat->symmetric_set) { 3019 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3020 } 3021 /* Matrix for Dirichlet problem is pcis->A_II */ 3022 n_D = pcis->n - pcis->n_B; 3023 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3024 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3025 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3026 /* default */ 3027 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3028 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3029 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3030 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3031 if (issbaij) { 3032 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3033 } else { 3034 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3035 } 3036 /* Allow user's customization */ 3037 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3038 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3039 } 3040 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3041 if (sub_schurs->reuse_solver) { 3042 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3043 3044 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3045 } 3046 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3047 if (!n_D) { 3048 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3049 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3050 } 3051 /* Set Up KSP for Dirichlet problem of BDDC */ 3052 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3053 /* set ksp_D into pcis data */ 3054 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3055 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3056 pcis->ksp_D = pcbddc->ksp_D; 3057 } 3058 3059 /* NEUMANN PROBLEM */ 3060 A_RR = 0; 3061 if (neumann) { 3062 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3063 PetscInt ibs,mbs; 3064 PetscBool issbaij; 3065 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3066 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3067 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3068 if (pcbddc->ksp_R) { /* already created ksp */ 3069 PetscInt nn_R; 3070 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3071 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3072 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3073 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3074 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3075 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3076 reuse = MAT_INITIAL_MATRIX; 3077 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3078 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3079 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3080 reuse = MAT_INITIAL_MATRIX; 3081 } else { /* safe to reuse the matrix */ 3082 reuse = MAT_REUSE_MATRIX; 3083 } 3084 } 3085 /* last check */ 3086 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3087 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3088 reuse = MAT_INITIAL_MATRIX; 3089 } 3090 } else { /* first time, so we need to create the matrix */ 3091 reuse = MAT_INITIAL_MATRIX; 3092 } 3093 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3094 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3095 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3096 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3097 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3098 if (matis->A == pcbddc->local_mat) { 3099 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3100 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3101 } else { 3102 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3103 } 3104 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3105 if (matis->A == pcbddc->local_mat) { 3106 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3107 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3108 } else { 3109 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3110 } 3111 } 3112 /* extract A_RR */ 3113 if (sub_schurs->reuse_solver) { 3114 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3115 3116 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3117 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3118 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3119 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3120 } else { 3121 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3122 } 3123 } else { 3124 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3125 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3126 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3127 } 3128 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3129 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3130 } 3131 if (pcbddc->local_mat->symmetric_set) { 3132 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3133 } 3134 if (!pcbddc->ksp_R) { /* create object if not present */ 3135 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3136 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3137 /* default */ 3138 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3139 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3140 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3141 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3142 if (issbaij) { 3143 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3144 } else { 3145 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3146 } 3147 /* Allow user's customization */ 3148 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3149 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3150 } 3151 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3152 if (!n_R) { 3153 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3154 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3155 } 3156 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3157 /* Reuse solver if it is present */ 3158 if (sub_schurs->reuse_solver) { 3159 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3160 3161 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3162 } 3163 /* Set Up KSP for Neumann problem of BDDC */ 3164 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3165 } 3166 3167 if (pcbddc->dbg_flag) { 3168 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3169 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3170 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3171 } 3172 3173 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3174 check_corr[0] = check_corr[1] = PETSC_FALSE; 3175 if (pcbddc->NullSpace_corr[0]) { 3176 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3177 } 3178 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3179 check_corr[0] = PETSC_TRUE; 3180 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3181 } 3182 if (neumann && pcbddc->NullSpace_corr[2]) { 3183 check_corr[1] = PETSC_TRUE; 3184 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3185 } 3186 3187 /* check Dirichlet and Neumann solvers */ 3188 if (pcbddc->dbg_flag) { 3189 if (dirichlet) { /* Dirichlet */ 3190 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3191 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3192 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3193 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3194 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3196 if (check_corr[0]) { 3197 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3198 } 3199 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3200 } 3201 if (neumann) { /* Neumann */ 3202 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3203 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3204 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3205 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3206 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3208 if (check_corr[1]) { 3209 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3210 } 3211 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3212 } 3213 } 3214 /* free Neumann problem's matrix */ 3215 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3221 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3222 { 3223 PetscErrorCode ierr; 3224 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3225 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3226 3227 PetscFunctionBegin; 3228 if (!sub_schurs->reuse_solver) { 3229 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3230 } 3231 if (!pcbddc->switch_static) { 3232 if (applytranspose && pcbddc->local_auxmat1) { 3233 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3234 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3235 } 3236 if (!sub_schurs->reuse_solver) { 3237 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3238 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3239 } else { 3240 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3241 3242 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3243 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3244 } 3245 } else { 3246 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3247 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3248 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3249 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3250 if (applytranspose && pcbddc->local_auxmat1) { 3251 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3252 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3253 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3254 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3255 } 3256 } 3257 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3258 if (applytranspose) { 3259 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3260 } else { 3261 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3262 } 3263 } else { 3264 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3265 3266 if (applytranspose) { 3267 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3268 } else { 3269 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3270 } 3271 } 3272 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3273 if (!pcbddc->switch_static) { 3274 if (!sub_schurs->reuse_solver) { 3275 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3276 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3277 } else { 3278 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3279 3280 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3281 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3282 } 3283 if (!applytranspose && pcbddc->local_auxmat1) { 3284 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3285 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3286 } 3287 } else { 3288 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3289 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3290 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3291 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3292 if (!applytranspose && pcbddc->local_auxmat1) { 3293 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3294 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3295 } 3296 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3297 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3298 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3299 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3300 } 3301 PetscFunctionReturn(0); 3302 } 3303 3304 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3305 #undef __FUNCT__ 3306 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3307 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3308 { 3309 PetscErrorCode ierr; 3310 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3311 PC_IS* pcis = (PC_IS*) (pc->data); 3312 const PetscScalar zero = 0.0; 3313 3314 PetscFunctionBegin; 3315 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3316 if (!pcbddc->benign_apply_coarse_only) { 3317 if (applytranspose) { 3318 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3319 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3320 } else { 3321 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3322 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3323 } 3324 } else { 3325 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3326 } 3327 3328 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3329 if (pcbddc->benign_n) { 3330 PetscScalar *array; 3331 PetscInt j; 3332 3333 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3334 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3335 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3336 } 3337 3338 /* start communications from local primal nodes to rhs of coarse solver */ 3339 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3340 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3341 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3342 3343 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3344 if (pcbddc->coarse_ksp) { 3345 Mat coarse_mat; 3346 Vec rhs,sol; 3347 MatNullSpace nullsp; 3348 PetscBool isbddc = PETSC_FALSE; 3349 3350 if (pcbddc->benign_have_null) { 3351 PC coarse_pc; 3352 3353 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3354 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3355 /* we need to propagate to coarser levels the need for a possible benign correction */ 3356 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3357 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3358 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3359 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3360 } 3361 } 3362 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3363 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3364 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3365 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3366 if (nullsp) { 3367 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3368 } 3369 if (applytranspose) { 3370 if (pcbddc->benign_apply_coarse_only) { 3371 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3372 } else { 3373 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3374 } 3375 } else { 3376 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3377 PC coarse_pc; 3378 3379 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3380 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3381 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3382 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3383 } else { 3384 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3385 } 3386 } 3387 /* we don't need the benign correction at coarser levels anymore */ 3388 if (pcbddc->benign_have_null && isbddc) { 3389 PC coarse_pc; 3390 PC_BDDC* coarsepcbddc; 3391 3392 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3393 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3394 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3395 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3396 } 3397 if (nullsp) { 3398 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3399 } 3400 } 3401 3402 /* Local solution on R nodes */ 3403 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3404 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3405 } 3406 /* communications from coarse sol to local primal nodes */ 3407 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3408 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3409 3410 /* Sum contributions from the two levels */ 3411 if (!pcbddc->benign_apply_coarse_only) { 3412 if (applytranspose) { 3413 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3414 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3415 } else { 3416 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3417 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3418 } 3419 /* store p0 */ 3420 if (pcbddc->benign_n) { 3421 PetscScalar *array; 3422 PetscInt j; 3423 3424 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3425 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3426 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3427 } 3428 } else { /* expand the coarse solution */ 3429 if (applytranspose) { 3430 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3431 } else { 3432 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3433 } 3434 } 3435 PetscFunctionReturn(0); 3436 } 3437 3438 #undef __FUNCT__ 3439 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3440 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3441 { 3442 PetscErrorCode ierr; 3443 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3444 PetscScalar *array; 3445 Vec from,to; 3446 3447 PetscFunctionBegin; 3448 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3449 from = pcbddc->coarse_vec; 3450 to = pcbddc->vec1_P; 3451 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3452 Vec tvec; 3453 3454 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3455 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3456 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3457 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3458 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3459 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3460 } 3461 } else { /* from local to global -> put data in coarse right hand side */ 3462 from = pcbddc->vec1_P; 3463 to = pcbddc->coarse_vec; 3464 } 3465 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3466 PetscFunctionReturn(0); 3467 } 3468 3469 #undef __FUNCT__ 3470 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3471 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3472 { 3473 PetscErrorCode ierr; 3474 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3475 PetscScalar *array; 3476 Vec from,to; 3477 3478 PetscFunctionBegin; 3479 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3480 from = pcbddc->coarse_vec; 3481 to = pcbddc->vec1_P; 3482 } else { /* from local to global -> put data in coarse right hand side */ 3483 from = pcbddc->vec1_P; 3484 to = pcbddc->coarse_vec; 3485 } 3486 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3487 if (smode == SCATTER_FORWARD) { 3488 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3489 Vec tvec; 3490 3491 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3492 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3493 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3494 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3495 } 3496 } else { 3497 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3498 ierr = VecResetArray(from);CHKERRQ(ierr); 3499 } 3500 } 3501 PetscFunctionReturn(0); 3502 } 3503 3504 /* uncomment for testing purposes */ 3505 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3508 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3509 { 3510 PetscErrorCode ierr; 3511 PC_IS* pcis = (PC_IS*)(pc->data); 3512 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3513 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3514 /* one and zero */ 3515 PetscScalar one=1.0,zero=0.0; 3516 /* space to store constraints and their local indices */ 3517 PetscScalar *constraints_data; 3518 PetscInt *constraints_idxs,*constraints_idxs_B; 3519 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3520 PetscInt *constraints_n; 3521 /* iterators */ 3522 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3523 /* BLAS integers */ 3524 PetscBLASInt lwork,lierr; 3525 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3526 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3527 /* reuse */ 3528 PetscInt olocal_primal_size,olocal_primal_size_cc; 3529 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3530 /* change of basis */ 3531 PetscBool qr_needed; 3532 PetscBT change_basis,qr_needed_idx; 3533 /* auxiliary stuff */ 3534 PetscInt *nnz,*is_indices; 3535 PetscInt ncc; 3536 /* some quantities */ 3537 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3538 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3539 3540 PetscFunctionBegin; 3541 /* Destroy Mat objects computed previously */ 3542 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3543 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3544 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3545 /* save info on constraints from previous setup (if any) */ 3546 olocal_primal_size = pcbddc->local_primal_size; 3547 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3548 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3549 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3550 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3551 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3552 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3553 3554 if (!pcbddc->adaptive_selection) { 3555 IS ISForVertices,*ISForFaces,*ISForEdges; 3556 MatNullSpace nearnullsp; 3557 const Vec *nearnullvecs; 3558 Vec *localnearnullsp; 3559 PetscScalar *array; 3560 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3561 PetscBool nnsp_has_cnst; 3562 /* LAPACK working arrays for SVD or POD */ 3563 PetscBool skip_lapack,boolforchange; 3564 PetscScalar *work; 3565 PetscReal *singular_vals; 3566 #if defined(PETSC_USE_COMPLEX) 3567 PetscReal *rwork; 3568 #endif 3569 #if defined(PETSC_MISSING_LAPACK_GESVD) 3570 PetscScalar *temp_basis,*correlation_mat; 3571 #else 3572 PetscBLASInt dummy_int=1; 3573 PetscScalar dummy_scalar=1.; 3574 #endif 3575 3576 /* Get index sets for faces, edges and vertices from graph */ 3577 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3578 /* print some info */ 3579 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3580 PetscInt nv; 3581 3582 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3583 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3584 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3585 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3587 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3590 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3591 } 3592 3593 /* free unneeded index sets */ 3594 if (!pcbddc->use_vertices) { 3595 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3596 } 3597 if (!pcbddc->use_edges) { 3598 for (i=0;i<n_ISForEdges;i++) { 3599 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3600 } 3601 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3602 n_ISForEdges = 0; 3603 } 3604 if (!pcbddc->use_faces) { 3605 for (i=0;i<n_ISForFaces;i++) { 3606 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3607 } 3608 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3609 n_ISForFaces = 0; 3610 } 3611 3612 /* check if near null space is attached to global mat */ 3613 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3614 if (nearnullsp) { 3615 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3616 /* remove any stored info */ 3617 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3618 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3619 /* store information for BDDC solver reuse */ 3620 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3621 pcbddc->onearnullspace = nearnullsp; 3622 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3623 for (i=0;i<nnsp_size;i++) { 3624 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3625 } 3626 } else { /* if near null space is not provided BDDC uses constants by default */ 3627 nnsp_size = 0; 3628 nnsp_has_cnst = PETSC_TRUE; 3629 } 3630 /* get max number of constraints on a single cc */ 3631 max_constraints = nnsp_size; 3632 if (nnsp_has_cnst) max_constraints++; 3633 3634 /* 3635 Evaluate maximum storage size needed by the procedure 3636 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3637 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3638 There can be multiple constraints per connected component 3639 */ 3640 n_vertices = 0; 3641 if (ISForVertices) { 3642 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3643 } 3644 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3645 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3646 3647 total_counts = n_ISForFaces+n_ISForEdges; 3648 total_counts *= max_constraints; 3649 total_counts += n_vertices; 3650 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3651 3652 total_counts = 0; 3653 max_size_of_constraint = 0; 3654 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3655 IS used_is; 3656 if (i<n_ISForEdges) { 3657 used_is = ISForEdges[i]; 3658 } else { 3659 used_is = ISForFaces[i-n_ISForEdges]; 3660 } 3661 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3662 total_counts += j; 3663 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3664 } 3665 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3666 3667 /* get local part of global near null space vectors */ 3668 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3669 for (k=0;k<nnsp_size;k++) { 3670 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3671 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3672 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3673 } 3674 3675 /* whether or not to skip lapack calls */ 3676 skip_lapack = PETSC_TRUE; 3677 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3678 3679 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3680 if (!skip_lapack) { 3681 PetscScalar temp_work; 3682 3683 #if defined(PETSC_MISSING_LAPACK_GESVD) 3684 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3685 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3686 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3687 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3688 #if defined(PETSC_USE_COMPLEX) 3689 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3690 #endif 3691 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3692 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3693 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3694 lwork = -1; 3695 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3696 #if !defined(PETSC_USE_COMPLEX) 3697 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3698 #else 3699 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3700 #endif 3701 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3702 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3703 #else /* on missing GESVD */ 3704 /* SVD */ 3705 PetscInt max_n,min_n; 3706 max_n = max_size_of_constraint; 3707 min_n = max_constraints; 3708 if (max_size_of_constraint < max_constraints) { 3709 min_n = max_size_of_constraint; 3710 max_n = max_constraints; 3711 } 3712 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3713 #if defined(PETSC_USE_COMPLEX) 3714 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3715 #endif 3716 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3717 lwork = -1; 3718 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3719 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3720 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3721 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3722 #if !defined(PETSC_USE_COMPLEX) 3723 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3724 #else 3725 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3726 #endif 3727 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3728 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3729 #endif /* on missing GESVD */ 3730 /* Allocate optimal workspace */ 3731 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3732 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3733 } 3734 /* Now we can loop on constraining sets */ 3735 total_counts = 0; 3736 constraints_idxs_ptr[0] = 0; 3737 constraints_data_ptr[0] = 0; 3738 /* vertices */ 3739 if (n_vertices) { 3740 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3741 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3742 for (i=0;i<n_vertices;i++) { 3743 constraints_n[total_counts] = 1; 3744 constraints_data[total_counts] = 1.0; 3745 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3746 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3747 total_counts++; 3748 } 3749 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3750 n_vertices = total_counts; 3751 } 3752 3753 /* edges and faces */ 3754 total_counts_cc = total_counts; 3755 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3756 IS used_is; 3757 PetscBool idxs_copied = PETSC_FALSE; 3758 3759 if (ncc<n_ISForEdges) { 3760 used_is = ISForEdges[ncc]; 3761 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3762 } else { 3763 used_is = ISForFaces[ncc-n_ISForEdges]; 3764 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3765 } 3766 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3767 3768 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3769 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3770 /* change of basis should not be performed on local periodic nodes */ 3771 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3772 if (nnsp_has_cnst) { 3773 PetscScalar quad_value; 3774 3775 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3776 idxs_copied = PETSC_TRUE; 3777 3778 if (!pcbddc->use_nnsp_true) { 3779 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3780 } else { 3781 quad_value = 1.0; 3782 } 3783 for (j=0;j<size_of_constraint;j++) { 3784 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3785 } 3786 temp_constraints++; 3787 total_counts++; 3788 } 3789 for (k=0;k<nnsp_size;k++) { 3790 PetscReal real_value; 3791 PetscScalar *ptr_to_data; 3792 3793 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3794 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3795 for (j=0;j<size_of_constraint;j++) { 3796 ptr_to_data[j] = array[is_indices[j]]; 3797 } 3798 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3799 /* check if array is null on the connected component */ 3800 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3801 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3802 if (real_value > 0.0) { /* keep indices and values */ 3803 temp_constraints++; 3804 total_counts++; 3805 if (!idxs_copied) { 3806 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3807 idxs_copied = PETSC_TRUE; 3808 } 3809 } 3810 } 3811 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3812 valid_constraints = temp_constraints; 3813 if (!pcbddc->use_nnsp_true && temp_constraints) { 3814 if (temp_constraints == 1) { /* just normalize the constraint */ 3815 PetscScalar norm,*ptr_to_data; 3816 3817 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3818 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3819 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3820 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3821 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3822 } else { /* perform SVD */ 3823 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3824 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3825 3826 #if defined(PETSC_MISSING_LAPACK_GESVD) 3827 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3828 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3829 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3830 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3831 from that computed using LAPACKgesvd 3832 -> This is due to a different computation of eigenvectors in LAPACKheev 3833 -> The quality of the POD-computed basis will be the same */ 3834 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3835 /* Store upper triangular part of correlation matrix */ 3836 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3837 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3838 for (j=0;j<temp_constraints;j++) { 3839 for (k=0;k<j+1;k++) { 3840 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3841 } 3842 } 3843 /* compute eigenvalues and eigenvectors of correlation matrix */ 3844 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3845 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3846 #if !defined(PETSC_USE_COMPLEX) 3847 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3848 #else 3849 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3850 #endif 3851 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3852 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3853 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3854 j = 0; 3855 while (j < temp_constraints && singular_vals[j] < tol) j++; 3856 total_counts = total_counts-j; 3857 valid_constraints = temp_constraints-j; 3858 /* scale and copy POD basis into used quadrature memory */ 3859 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3860 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3861 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3862 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3863 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3864 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3865 if (j<temp_constraints) { 3866 PetscInt ii; 3867 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3868 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3869 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3870 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3871 for (k=0;k<temp_constraints-j;k++) { 3872 for (ii=0;ii<size_of_constraint;ii++) { 3873 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3874 } 3875 } 3876 } 3877 #else /* on missing GESVD */ 3878 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3879 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3881 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3882 #if !defined(PETSC_USE_COMPLEX) 3883 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3884 #else 3885 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3886 #endif 3887 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3888 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3889 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3890 k = temp_constraints; 3891 if (k > size_of_constraint) k = size_of_constraint; 3892 j = 0; 3893 while (j < k && singular_vals[k-j-1] < tol) j++; 3894 valid_constraints = k-j; 3895 total_counts = total_counts-temp_constraints+valid_constraints; 3896 #endif /* on missing GESVD */ 3897 } 3898 } 3899 /* update pointers information */ 3900 if (valid_constraints) { 3901 constraints_n[total_counts_cc] = valid_constraints; 3902 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3903 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3904 /* set change_of_basis flag */ 3905 if (boolforchange) { 3906 PetscBTSet(change_basis,total_counts_cc); 3907 } 3908 total_counts_cc++; 3909 } 3910 } 3911 /* free workspace */ 3912 if (!skip_lapack) { 3913 ierr = PetscFree(work);CHKERRQ(ierr); 3914 #if defined(PETSC_USE_COMPLEX) 3915 ierr = PetscFree(rwork);CHKERRQ(ierr); 3916 #endif 3917 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3918 #if defined(PETSC_MISSING_LAPACK_GESVD) 3919 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3920 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3921 #endif 3922 } 3923 for (k=0;k<nnsp_size;k++) { 3924 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3925 } 3926 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3927 /* free index sets of faces, edges and vertices */ 3928 for (i=0;i<n_ISForFaces;i++) { 3929 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3930 } 3931 if (n_ISForFaces) { 3932 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3933 } 3934 for (i=0;i<n_ISForEdges;i++) { 3935 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3936 } 3937 if (n_ISForEdges) { 3938 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3939 } 3940 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3941 } else { 3942 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3943 3944 total_counts = 0; 3945 n_vertices = 0; 3946 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3947 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3948 } 3949 max_constraints = 0; 3950 total_counts_cc = 0; 3951 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3952 total_counts += pcbddc->adaptive_constraints_n[i]; 3953 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3954 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3955 } 3956 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3957 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3958 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3959 constraints_data = pcbddc->adaptive_constraints_data; 3960 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3961 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3962 total_counts_cc = 0; 3963 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3964 if (pcbddc->adaptive_constraints_n[i]) { 3965 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3966 } 3967 } 3968 #if 0 3969 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3970 for (i=0;i<total_counts_cc;i++) { 3971 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3972 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3973 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3974 printf(" %d",constraints_idxs[j]); 3975 } 3976 printf("\n"); 3977 printf("number of cc: %d\n",constraints_n[i]); 3978 } 3979 for (i=0;i<n_vertices;i++) { 3980 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3981 } 3982 for (i=0;i<sub_schurs->n_subs;i++) { 3983 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3984 } 3985 #endif 3986 3987 max_size_of_constraint = 0; 3988 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3989 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3990 /* Change of basis */ 3991 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3992 if (pcbddc->use_change_of_basis) { 3993 for (i=0;i<sub_schurs->n_subs;i++) { 3994 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3995 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3996 } 3997 } 3998 } 3999 } 4000 pcbddc->local_primal_size = total_counts; 4001 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4002 4003 /* map constraints_idxs in boundary numbering */ 4004 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4005 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4006 4007 /* Create constraint matrix */ 4008 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4009 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4010 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4011 4012 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4013 /* determine if a QR strategy is needed for change of basis */ 4014 qr_needed = PETSC_FALSE; 4015 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4016 total_primal_vertices=0; 4017 pcbddc->local_primal_size_cc = 0; 4018 for (i=0;i<total_counts_cc;i++) { 4019 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4020 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4021 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4022 pcbddc->local_primal_size_cc += 1; 4023 } else if (PetscBTLookup(change_basis,i)) { 4024 for (k=0;k<constraints_n[i];k++) { 4025 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4026 } 4027 pcbddc->local_primal_size_cc += constraints_n[i]; 4028 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4029 PetscBTSet(qr_needed_idx,i); 4030 qr_needed = PETSC_TRUE; 4031 } 4032 } else { 4033 pcbddc->local_primal_size_cc += 1; 4034 } 4035 } 4036 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4037 pcbddc->n_vertices = total_primal_vertices; 4038 /* permute indices in order to have a sorted set of vertices */ 4039 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4040 4041 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4042 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4043 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4044 4045 /* nonzero structure of constraint matrix */ 4046 /* and get reference dof for local constraints */ 4047 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4048 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4049 4050 j = total_primal_vertices; 4051 total_counts = total_primal_vertices; 4052 cum = total_primal_vertices; 4053 for (i=n_vertices;i<total_counts_cc;i++) { 4054 if (!PetscBTLookup(change_basis,i)) { 4055 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4056 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4057 cum++; 4058 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4059 for (k=0;k<constraints_n[i];k++) { 4060 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4061 nnz[j+k] = size_of_constraint; 4062 } 4063 j += constraints_n[i]; 4064 } 4065 } 4066 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4067 ierr = PetscFree(nnz);CHKERRQ(ierr); 4068 4069 /* set values in constraint matrix */ 4070 for (i=0;i<total_primal_vertices;i++) { 4071 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4072 } 4073 total_counts = total_primal_vertices; 4074 for (i=n_vertices;i<total_counts_cc;i++) { 4075 if (!PetscBTLookup(change_basis,i)) { 4076 PetscInt *cols; 4077 4078 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4079 cols = constraints_idxs+constraints_idxs_ptr[i]; 4080 for (k=0;k<constraints_n[i];k++) { 4081 PetscInt row = total_counts+k; 4082 PetscScalar *vals; 4083 4084 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4085 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4086 } 4087 total_counts += constraints_n[i]; 4088 } 4089 } 4090 /* assembling */ 4091 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4092 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4093 4094 /* 4095 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4096 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4097 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4098 */ 4099 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4100 if (pcbddc->use_change_of_basis) { 4101 /* dual and primal dofs on a single cc */ 4102 PetscInt dual_dofs,primal_dofs; 4103 /* working stuff for GEQRF */ 4104 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4105 PetscBLASInt lqr_work; 4106 /* working stuff for UNGQR */ 4107 PetscScalar *gqr_work,lgqr_work_t; 4108 PetscBLASInt lgqr_work; 4109 /* working stuff for TRTRS */ 4110 PetscScalar *trs_rhs; 4111 PetscBLASInt Blas_NRHS; 4112 /* pointers for values insertion into change of basis matrix */ 4113 PetscInt *start_rows,*start_cols; 4114 PetscScalar *start_vals; 4115 /* working stuff for values insertion */ 4116 PetscBT is_primal; 4117 PetscInt *aux_primal_numbering_B; 4118 /* matrix sizes */ 4119 PetscInt global_size,local_size; 4120 /* temporary change of basis */ 4121 Mat localChangeOfBasisMatrix; 4122 /* extra space for debugging */ 4123 PetscScalar *dbg_work; 4124 4125 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4126 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4127 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4128 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4129 /* nonzeros for local mat */ 4130 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4131 if (!pcbddc->benign_change || pcbddc->fake_change) { 4132 for (i=0;i<pcis->n;i++) nnz[i]=1; 4133 } else { 4134 const PetscInt *ii; 4135 PetscInt n; 4136 PetscBool flg_row; 4137 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4138 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4139 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4140 } 4141 for (i=n_vertices;i<total_counts_cc;i++) { 4142 if (PetscBTLookup(change_basis,i)) { 4143 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4144 if (PetscBTLookup(qr_needed_idx,i)) { 4145 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4146 } else { 4147 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4148 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4149 } 4150 } 4151 } 4152 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4153 ierr = PetscFree(nnz);CHKERRQ(ierr); 4154 /* Set interior change in the matrix */ 4155 if (!pcbddc->benign_change || pcbddc->fake_change) { 4156 for (i=0;i<pcis->n;i++) { 4157 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4158 } 4159 } else { 4160 const PetscInt *ii,*jj; 4161 PetscScalar *aa; 4162 PetscInt n; 4163 PetscBool flg_row; 4164 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4165 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4166 for (i=0;i<n;i++) { 4167 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4168 } 4169 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4170 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4171 } 4172 4173 if (pcbddc->dbg_flag) { 4174 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4175 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4176 } 4177 4178 4179 /* Now we loop on the constraints which need a change of basis */ 4180 /* 4181 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4182 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4183 4184 Basic blocks of change of basis matrix T computed by 4185 4186 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4187 4188 | 1 0 ... 0 s_1/S | 4189 | 0 1 ... 0 s_2/S | 4190 | ... | 4191 | 0 ... 1 s_{n-1}/S | 4192 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4193 4194 with S = \sum_{i=1}^n s_i^2 4195 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4196 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4197 4198 - QR decomposition of constraints otherwise 4199 */ 4200 if (qr_needed) { 4201 /* space to store Q */ 4202 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4203 /* first we issue queries for optimal work */ 4204 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4205 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4206 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4207 lqr_work = -1; 4208 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4209 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4210 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4211 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4212 lgqr_work = -1; 4213 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4214 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4215 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4216 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4217 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4218 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4219 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4220 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4221 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4222 /* array to store scaling factors for reflectors */ 4223 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4224 /* array to store rhs and solution of triangular solver */ 4225 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4226 /* allocating workspace for check */ 4227 if (pcbddc->dbg_flag) { 4228 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4229 } 4230 } 4231 /* array to store whether a node is primal or not */ 4232 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4233 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4234 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4235 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4236 for (i=0;i<total_primal_vertices;i++) { 4237 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4238 } 4239 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4240 4241 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4242 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4243 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4244 if (PetscBTLookup(change_basis,total_counts)) { 4245 /* get constraint info */ 4246 primal_dofs = constraints_n[total_counts]; 4247 dual_dofs = size_of_constraint-primal_dofs; 4248 4249 if (pcbddc->dbg_flag) { 4250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4251 } 4252 4253 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4254 4255 /* copy quadrature constraints for change of basis check */ 4256 if (pcbddc->dbg_flag) { 4257 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4258 } 4259 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4260 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4261 4262 /* compute QR decomposition of constraints */ 4263 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4264 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4265 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4266 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4267 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4268 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4269 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4270 4271 /* explictly compute R^-T */ 4272 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4273 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4274 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4275 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4276 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4277 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4279 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4280 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4282 4283 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4285 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4286 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4287 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4288 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4289 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4290 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4291 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4292 4293 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4294 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4295 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4296 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4297 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4298 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4299 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4300 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4301 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4302 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4303 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4304 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4305 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4306 4307 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4308 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4309 /* insert cols for primal dofs */ 4310 for (j=0;j<primal_dofs;j++) { 4311 start_vals = &qr_basis[j*size_of_constraint]; 4312 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4313 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4314 } 4315 /* insert cols for dual dofs */ 4316 for (j=0,k=0;j<dual_dofs;k++) { 4317 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4318 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4319 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4320 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4321 j++; 4322 } 4323 } 4324 4325 /* check change of basis */ 4326 if (pcbddc->dbg_flag) { 4327 PetscInt ii,jj; 4328 PetscBool valid_qr=PETSC_TRUE; 4329 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4333 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4334 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4335 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4336 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4337 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4338 for (jj=0;jj<size_of_constraint;jj++) { 4339 for (ii=0;ii<primal_dofs;ii++) { 4340 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4341 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4342 } 4343 } 4344 if (!valid_qr) { 4345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4346 for (jj=0;jj<size_of_constraint;jj++) { 4347 for (ii=0;ii<primal_dofs;ii++) { 4348 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4349 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4350 } 4351 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4352 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4353 } 4354 } 4355 } 4356 } else { 4357 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4358 } 4359 } 4360 } else { /* simple transformation block */ 4361 PetscInt row,col; 4362 PetscScalar val,norm; 4363 4364 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4365 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4366 for (j=0;j<size_of_constraint;j++) { 4367 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4368 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4369 if (!PetscBTLookup(is_primal,row_B)) { 4370 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4371 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4372 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4373 } else { 4374 for (k=0;k<size_of_constraint;k++) { 4375 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4376 if (row != col) { 4377 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4378 } else { 4379 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4380 } 4381 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4382 } 4383 } 4384 } 4385 if (pcbddc->dbg_flag) { 4386 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4387 } 4388 } 4389 } else { 4390 if (pcbddc->dbg_flag) { 4391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4392 } 4393 } 4394 } 4395 4396 /* free workspace */ 4397 if (qr_needed) { 4398 if (pcbddc->dbg_flag) { 4399 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4400 } 4401 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4402 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4403 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4404 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4405 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4406 } 4407 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4408 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4409 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4410 4411 /* assembling of global change of variable */ 4412 if (!pcbddc->fake_change) { 4413 Mat tmat; 4414 PetscInt bs; 4415 4416 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4417 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4418 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4419 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4420 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4421 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4422 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4423 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4424 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4425 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4426 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4427 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4428 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4429 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4430 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4431 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4432 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4433 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4434 4435 /* check */ 4436 if (pcbddc->dbg_flag) { 4437 PetscReal error; 4438 Vec x,x_change; 4439 4440 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4441 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4442 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4443 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4444 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4445 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4446 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4447 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4448 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4449 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4450 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4451 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4452 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4453 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4454 ierr = VecDestroy(&x);CHKERRQ(ierr); 4455 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4456 } 4457 /* adapt sub_schurs computed (if any) */ 4458 if (pcbddc->use_deluxe_scaling) { 4459 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4460 4461 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4462 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4463 } 4464 if (sub_schurs->S_Ej_all) { 4465 Mat S_new,tmat; 4466 IS is_all_N,is_V_Sall = NULL; 4467 4468 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4469 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4470 if (pcbddc->deluxe_zerorows) { 4471 ISLocalToGlobalMapping NtoSall; 4472 IS is_V; 4473 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4474 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4475 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4476 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4477 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4478 } 4479 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4480 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4481 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4482 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4483 if (pcbddc->deluxe_zerorows) { 4484 const PetscScalar *array; 4485 const PetscInt *idxs_V,*idxs_all; 4486 PetscInt i,n_V; 4487 4488 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4489 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4490 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4491 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4492 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4493 for (i=0;i<n_V;i++) { 4494 PetscScalar val; 4495 PetscInt idx; 4496 4497 idx = idxs_V[i]; 4498 val = array[idxs_all[idxs_V[i]]]; 4499 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4500 } 4501 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4502 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4503 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4504 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4505 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4506 } 4507 sub_schurs->S_Ej_all = S_new; 4508 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4509 if (sub_schurs->sum_S_Ej_all) { 4510 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4511 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4512 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4513 if (pcbddc->deluxe_zerorows) { 4514 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4515 } 4516 sub_schurs->sum_S_Ej_all = S_new; 4517 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4518 } 4519 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4520 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4521 } 4522 /* destroy any change of basis context in sub_schurs */ 4523 if (sub_schurs->change) { 4524 PetscInt i; 4525 4526 for (i=0;i<sub_schurs->n_subs;i++) { 4527 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4528 } 4529 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4530 } 4531 } 4532 if (pcbddc->switch_static) { /* need to save the local change */ 4533 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4534 } else { 4535 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4536 } 4537 /* determine if any process has changed the pressures locally */ 4538 pcbddc->change_interior = pcbddc->benign_have_null; 4539 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4540 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4541 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4542 pcbddc->use_qr_single = qr_needed; 4543 } 4544 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4545 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4546 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4547 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4548 } else { 4549 Mat benign_global = NULL; 4550 if (pcbddc->benign_have_null) { 4551 Mat tmat; 4552 4553 pcbddc->change_interior = PETSC_TRUE; 4554 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4555 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4556 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4557 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4558 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4559 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4560 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4561 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4562 if (pcbddc->benign_change) { 4563 Mat M; 4564 4565 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4566 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4567 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4568 ierr = MatDestroy(&M);CHKERRQ(ierr); 4569 } else { 4570 Mat eye; 4571 PetscScalar *array; 4572 4573 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4574 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4575 for (i=0;i<pcis->n;i++) { 4576 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4577 } 4578 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4579 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4580 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4581 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4582 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4583 } 4584 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4585 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4586 } 4587 if (pcbddc->user_ChangeOfBasisMatrix) { 4588 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4589 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4590 } else if (pcbddc->benign_have_null) { 4591 pcbddc->ChangeOfBasisMatrix = benign_global; 4592 } 4593 } 4594 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4595 IS is_global; 4596 const PetscInt *gidxs; 4597 4598 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4599 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4600 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4601 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4602 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4603 } 4604 } 4605 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4606 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4607 } 4608 4609 if (!pcbddc->fake_change) { 4610 /* add pressure dofs to set of primal nodes for numbering purposes */ 4611 for (i=0;i<pcbddc->benign_n;i++) { 4612 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4613 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4614 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4615 pcbddc->local_primal_size_cc++; 4616 pcbddc->local_primal_size++; 4617 } 4618 4619 /* check if a new primal space has been introduced (also take into account benign trick) */ 4620 pcbddc->new_primal_space_local = PETSC_TRUE; 4621 if (olocal_primal_size == pcbddc->local_primal_size) { 4622 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4623 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4624 if (!pcbddc->new_primal_space_local) { 4625 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4626 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4627 } 4628 } 4629 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4630 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4631 } 4632 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4633 4634 /* flush dbg viewer */ 4635 if (pcbddc->dbg_flag) { 4636 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4637 } 4638 4639 /* free workspace */ 4640 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4641 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4642 if (!pcbddc->adaptive_selection) { 4643 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4644 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4645 } else { 4646 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4647 pcbddc->adaptive_constraints_idxs_ptr, 4648 pcbddc->adaptive_constraints_data_ptr, 4649 pcbddc->adaptive_constraints_idxs, 4650 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4651 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4652 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4653 } 4654 PetscFunctionReturn(0); 4655 } 4656 4657 #undef __FUNCT__ 4658 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4659 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4660 { 4661 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4662 PC_IS *pcis = (PC_IS*)pc->data; 4663 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4664 PetscInt ierr,i,N; 4665 4666 PetscFunctionBegin; 4667 /* Reset previously computed graph */ 4668 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4669 /* Init local Graph struct */ 4670 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4671 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4672 4673 /* Check validity of the csr graph passed in by the user */ 4674 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4675 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4676 } 4677 4678 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4679 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4680 PetscInt *xadj,*adjncy; 4681 PetscInt nvtxs; 4682 PetscBool flg_row=PETSC_FALSE; 4683 4684 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4685 if (flg_row) { 4686 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4687 pcbddc->computed_rowadj = PETSC_TRUE; 4688 } 4689 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4690 } 4691 if (pcbddc->dbg_flag) { 4692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4693 } 4694 4695 /* Setup of Graph */ 4696 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4697 4698 /* attach info on disconnected subdomains if present */ 4699 if (pcbddc->n_local_subs) { 4700 PetscInt *local_subs; 4701 4702 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4703 for (i=0;i<pcbddc->n_local_subs;i++) { 4704 const PetscInt *idxs; 4705 PetscInt nl,j; 4706 4707 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4708 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4709 for (j=0;j<nl;j++) { 4710 local_subs[idxs[j]] = i; 4711 } 4712 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4713 } 4714 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4715 pcbddc->mat_graph->local_subs = local_subs; 4716 } 4717 4718 /* Graph's connected components analysis */ 4719 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4720 PetscFunctionReturn(0); 4721 } 4722 4723 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4724 #undef __FUNCT__ 4725 #define __FUNCT__ "PCBDDCSubsetNumbering" 4726 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4727 { 4728 PetscSF sf; 4729 PetscLayout map; 4730 const PetscInt *idxs; 4731 PetscInt *leaf_data,*root_data,*gidxs; 4732 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4733 PetscInt n_n,nlocals,start,first_index; 4734 PetscMPIInt commsize; 4735 PetscBool first_found; 4736 PetscErrorCode ierr; 4737 4738 PetscFunctionBegin; 4739 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4740 if (subset_mult) { 4741 PetscCheckSameComm(subset,1,subset_mult,2); 4742 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4743 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4744 } 4745 /* create workspace layout for computing global indices of subset */ 4746 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4747 lbounds[0] = lbounds[1] = 0; 4748 for (i=0;i<n;i++) { 4749 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4750 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4751 } 4752 lbounds[0] = -lbounds[0]; 4753 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4754 gbounds[0] = -gbounds[0]; 4755 N = gbounds[1] - gbounds[0] + 1; 4756 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4757 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4758 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4759 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4760 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4761 4762 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4763 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4764 if (subset_mult) { 4765 const PetscInt* idxs_mult; 4766 4767 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4768 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4769 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4770 } else { 4771 for (i=0;i<n;i++) leaf_data[i] = 1; 4772 } 4773 /* local size of new subset */ 4774 n_n = 0; 4775 for (i=0;i<n;i++) n_n += leaf_data[i]; 4776 4777 /* global indexes in layout */ 4778 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4779 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4780 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4781 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4782 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4783 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4784 4785 /* reduce from leaves to roots */ 4786 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4787 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4788 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4789 4790 /* count indexes in local part of layout */ 4791 nlocals = 0; 4792 first_index = -1; 4793 first_found = PETSC_FALSE; 4794 for (i=0;i<Nl;i++) { 4795 if (!first_found && root_data[i]) { 4796 first_found = PETSC_TRUE; 4797 first_index = i; 4798 } 4799 nlocals += root_data[i]; 4800 } 4801 4802 /* cumulative of number of indexes and size of subset without holes */ 4803 #if defined(PETSC_HAVE_MPI_EXSCAN) 4804 start = 0; 4805 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4806 #else 4807 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4808 start = start-nlocals; 4809 #endif 4810 4811 if (N_n) { /* compute total size of new subset if requested */ 4812 *N_n = start + nlocals; 4813 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4814 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4815 } 4816 4817 /* adapt root data with cumulative */ 4818 if (first_found) { 4819 PetscInt old_index; 4820 4821 root_data[first_index] += start; 4822 old_index = first_index; 4823 for (i=first_index+1;i<Nl;i++) { 4824 if (root_data[i]) { 4825 root_data[i] += root_data[old_index]; 4826 old_index = i; 4827 } 4828 } 4829 } 4830 4831 /* from roots to leaves */ 4832 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4833 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4834 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4835 4836 /* create new IS with global indexes without holes */ 4837 if (subset_mult) { 4838 const PetscInt* idxs_mult; 4839 PetscInt cum; 4840 4841 cum = 0; 4842 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4843 for (i=0;i<n;i++) { 4844 PetscInt j; 4845 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4846 } 4847 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4848 } else { 4849 for (i=0;i<n;i++) { 4850 gidxs[i] = leaf_data[i]-1; 4851 } 4852 } 4853 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4854 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4855 PetscFunctionReturn(0); 4856 } 4857 4858 #undef __FUNCT__ 4859 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4860 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4861 { 4862 PetscInt i,j; 4863 PetscScalar *alphas; 4864 PetscErrorCode ierr; 4865 4866 PetscFunctionBegin; 4867 /* this implements stabilized Gram-Schmidt */ 4868 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4869 for (i=0;i<n;i++) { 4870 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4871 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4872 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4873 } 4874 ierr = PetscFree(alphas);CHKERRQ(ierr); 4875 PetscFunctionReturn(0); 4876 } 4877 4878 #undef __FUNCT__ 4879 #define __FUNCT__ "MatISGetSubassemblingPattern" 4880 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4881 { 4882 Mat A; 4883 PetscInt n_neighs,*neighs,*n_shared,**shared; 4884 PetscMPIInt size,rank,color; 4885 PetscInt *xadj,*adjncy; 4886 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4887 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4888 PetscInt void_procs,*procs_candidates = NULL; 4889 PetscInt xadj_count, *count; 4890 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4891 PetscSubcomm psubcomm; 4892 MPI_Comm subcomm; 4893 PetscErrorCode ierr; 4894 4895 PetscFunctionBegin; 4896 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4897 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4898 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4899 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4900 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4901 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4902 4903 if (have_void) *have_void = PETSC_FALSE; 4904 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4905 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4906 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4907 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4908 im_active = !!(n); 4909 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4910 void_procs = size - active_procs; 4911 /* get ranks of of non-active processes in mat communicator */ 4912 if (void_procs) { 4913 PetscInt ncand; 4914 4915 if (have_void) *have_void = PETSC_TRUE; 4916 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4917 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4918 for (i=0,ncand=0;i<size;i++) { 4919 if (!procs_candidates[i]) { 4920 procs_candidates[ncand++] = i; 4921 } 4922 } 4923 /* force n_subdomains to be not greater that the number of non-active processes */ 4924 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4925 } 4926 4927 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4928 if (active_procs < *n_subdomains) { 4929 PetscInt issize,isidx; 4930 if (im_active) { 4931 issize = 1; 4932 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4933 isidx = procs_candidates[rank]; 4934 } else { 4935 isidx = rank; 4936 } 4937 } else { 4938 issize = 0; 4939 isidx = -1; 4940 } 4941 *n_subdomains = active_procs; 4942 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4943 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4944 PetscFunctionReturn(0); 4945 } 4946 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4947 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4948 threshold = PetscMax(threshold,2); 4949 4950 /* Get info on mapping */ 4951 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4952 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4953 4954 /* build local CSR graph of subdomains' connectivity */ 4955 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4956 xadj[0] = 0; 4957 xadj[1] = PetscMax(n_neighs-1,0); 4958 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4959 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4960 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4961 for (i=1;i<n_neighs;i++) 4962 for (j=0;j<n_shared[i];j++) 4963 count[shared[i][j]] += 1; 4964 4965 xadj_count = 0; 4966 for (i=1;i<n_neighs;i++) { 4967 for (j=0;j<n_shared[i];j++) { 4968 if (count[shared[i][j]] < threshold) { 4969 adjncy[xadj_count] = neighs[i]; 4970 adjncy_wgt[xadj_count] = n_shared[i]; 4971 xadj_count++; 4972 break; 4973 } 4974 } 4975 } 4976 xadj[1] = xadj_count; 4977 ierr = PetscFree(count);CHKERRQ(ierr); 4978 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4979 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4980 4981 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4982 4983 /* Restrict work on active processes only */ 4984 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4985 if (void_procs) { 4986 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4987 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4988 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4989 subcomm = PetscSubcommChild(psubcomm); 4990 } else { 4991 psubcomm = NULL; 4992 subcomm = PetscObjectComm((PetscObject)mat); 4993 } 4994 4995 v_wgt = NULL; 4996 if (!color) { 4997 ierr = PetscFree(xadj);CHKERRQ(ierr); 4998 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4999 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5000 } else { 5001 Mat subdomain_adj; 5002 IS new_ranks,new_ranks_contig; 5003 MatPartitioning partitioner; 5004 PetscInt rstart=0,rend=0; 5005 PetscInt *is_indices,*oldranks; 5006 PetscMPIInt size; 5007 PetscBool aggregate; 5008 5009 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5010 if (void_procs) { 5011 PetscInt prank = rank; 5012 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5013 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5014 for (i=0;i<xadj[1];i++) { 5015 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5016 } 5017 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5018 } else { 5019 oldranks = NULL; 5020 } 5021 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5022 if (aggregate) { /* TODO: all this part could be made more efficient */ 5023 PetscInt lrows,row,ncols,*cols; 5024 PetscMPIInt nrank; 5025 PetscScalar *vals; 5026 5027 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5028 lrows = 0; 5029 if (nrank<redprocs) { 5030 lrows = size/redprocs; 5031 if (nrank<size%redprocs) lrows++; 5032 } 5033 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5034 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5035 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5036 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5037 row = nrank; 5038 ncols = xadj[1]-xadj[0]; 5039 cols = adjncy; 5040 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5041 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5042 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5043 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5044 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5045 ierr = PetscFree(xadj);CHKERRQ(ierr); 5046 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5047 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5048 ierr = PetscFree(vals);CHKERRQ(ierr); 5049 if (use_vwgt) { 5050 Vec v; 5051 const PetscScalar *array; 5052 PetscInt nl; 5053 5054 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5055 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5056 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5057 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5058 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5059 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5060 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5061 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5062 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5063 ierr = VecDestroy(&v);CHKERRQ(ierr); 5064 } 5065 } else { 5066 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5067 if (use_vwgt) { 5068 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5069 v_wgt[0] = local_size; 5070 } 5071 } 5072 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5073 5074 /* Partition */ 5075 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5076 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5077 if (v_wgt) { 5078 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5079 } 5080 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5081 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5082 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5083 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5084 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5085 5086 /* renumber new_ranks to avoid "holes" in new set of processors */ 5087 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5088 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5089 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5090 if (!aggregate) { 5091 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5092 #if defined(PETSC_USE_DEBUG) 5093 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5094 #endif 5095 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5096 } else if (oldranks) { 5097 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5098 } else { 5099 ranks_send_to_idx[0] = is_indices[0]; 5100 } 5101 } else { 5102 PetscInt idxs[1]; 5103 PetscMPIInt tag; 5104 MPI_Request *reqs; 5105 5106 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5107 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5108 for (i=rstart;i<rend;i++) { 5109 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5110 } 5111 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5112 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5113 ierr = PetscFree(reqs);CHKERRQ(ierr); 5114 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5115 #if defined(PETSC_USE_DEBUG) 5116 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5117 #endif 5118 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5119 } else if (oldranks) { 5120 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5121 } else { 5122 ranks_send_to_idx[0] = idxs[0]; 5123 } 5124 } 5125 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5126 /* clean up */ 5127 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5128 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5129 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5130 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5131 } 5132 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5133 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5134 5135 /* assemble parallel IS for sends */ 5136 i = 1; 5137 if (!color) i=0; 5138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5139 PetscFunctionReturn(0); 5140 } 5141 5142 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5143 5144 #undef __FUNCT__ 5145 #define __FUNCT__ "MatISSubassemble" 5146 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5147 { 5148 Mat local_mat; 5149 IS is_sends_internal; 5150 PetscInt rows,cols,new_local_rows; 5151 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5152 PetscBool ismatis,isdense,newisdense,destroy_mat; 5153 ISLocalToGlobalMapping l2gmap; 5154 PetscInt* l2gmap_indices; 5155 const PetscInt* is_indices; 5156 MatType new_local_type; 5157 /* buffers */ 5158 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5159 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5160 PetscInt *recv_buffer_idxs_local; 5161 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5162 /* MPI */ 5163 MPI_Comm comm,comm_n; 5164 PetscSubcomm subcomm; 5165 PetscMPIInt n_sends,n_recvs,commsize; 5166 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5167 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5168 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5169 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5170 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5171 PetscErrorCode ierr; 5172 5173 PetscFunctionBegin; 5174 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5175 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5176 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5177 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5178 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5179 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5180 PetscValidLogicalCollectiveBool(mat,reuse,6); 5181 PetscValidLogicalCollectiveInt(mat,nis,8); 5182 5183 /* further checks */ 5184 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5185 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5186 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5187 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5188 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5189 if (reuse && *mat_n) { 5190 PetscInt mrows,mcols,mnrows,mncols; 5191 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5192 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5193 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5194 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5195 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5196 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5197 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5198 } 5199 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5200 PetscValidLogicalCollectiveInt(mat,bs,0); 5201 5202 /* prepare IS for sending if not provided */ 5203 if (!is_sends) { 5204 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5205 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5206 } else { 5207 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5208 is_sends_internal = is_sends; 5209 } 5210 5211 /* get comm */ 5212 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5213 5214 /* compute number of sends */ 5215 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5216 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5217 5218 /* compute number of receives */ 5219 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5220 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5221 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5222 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5223 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5224 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5225 ierr = PetscFree(iflags);CHKERRQ(ierr); 5226 5227 /* restrict comm if requested */ 5228 subcomm = 0; 5229 destroy_mat = PETSC_FALSE; 5230 if (restrict_comm) { 5231 PetscMPIInt color,subcommsize; 5232 5233 color = 0; 5234 if (restrict_full) { 5235 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5236 } else { 5237 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5238 } 5239 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5240 subcommsize = commsize - subcommsize; 5241 /* check if reuse has been requested */ 5242 if (reuse) { 5243 if (*mat_n) { 5244 PetscMPIInt subcommsize2; 5245 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5246 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5247 comm_n = PetscObjectComm((PetscObject)*mat_n); 5248 } else { 5249 comm_n = PETSC_COMM_SELF; 5250 } 5251 } else { /* MAT_INITIAL_MATRIX */ 5252 PetscMPIInt rank; 5253 5254 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5255 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5256 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5257 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5258 comm_n = PetscSubcommChild(subcomm); 5259 } 5260 /* flag to destroy *mat_n if not significative */ 5261 if (color) destroy_mat = PETSC_TRUE; 5262 } else { 5263 comm_n = comm; 5264 } 5265 5266 /* prepare send/receive buffers */ 5267 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5268 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5269 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5270 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5271 if (nis) { 5272 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5273 } 5274 5275 /* Get data from local matrices */ 5276 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5277 /* TODO: See below some guidelines on how to prepare the local buffers */ 5278 /* 5279 send_buffer_vals should contain the raw values of the local matrix 5280 send_buffer_idxs should contain: 5281 - MatType_PRIVATE type 5282 - PetscInt size_of_l2gmap 5283 - PetscInt global_row_indices[size_of_l2gmap] 5284 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5285 */ 5286 else { 5287 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5288 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5289 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5290 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5291 send_buffer_idxs[1] = i; 5292 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5293 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5294 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5295 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5296 for (i=0;i<n_sends;i++) { 5297 ilengths_vals[is_indices[i]] = len*len; 5298 ilengths_idxs[is_indices[i]] = len+2; 5299 } 5300 } 5301 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5302 /* additional is (if any) */ 5303 if (nis) { 5304 PetscMPIInt psum; 5305 PetscInt j; 5306 for (j=0,psum=0;j<nis;j++) { 5307 PetscInt plen; 5308 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5309 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5310 psum += len+1; /* indices + lenght */ 5311 } 5312 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5313 for (j=0,psum=0;j<nis;j++) { 5314 PetscInt plen; 5315 const PetscInt *is_array_idxs; 5316 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5317 send_buffer_idxs_is[psum] = plen; 5318 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5319 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5320 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5321 psum += plen+1; /* indices + lenght */ 5322 } 5323 for (i=0;i<n_sends;i++) { 5324 ilengths_idxs_is[is_indices[i]] = psum; 5325 } 5326 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5327 } 5328 5329 buf_size_idxs = 0; 5330 buf_size_vals = 0; 5331 buf_size_idxs_is = 0; 5332 for (i=0;i<n_recvs;i++) { 5333 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5334 buf_size_vals += (PetscInt)olengths_vals[i]; 5335 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5336 } 5337 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5338 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5339 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5340 5341 /* get new tags for clean communications */ 5342 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5343 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5344 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5345 5346 /* allocate for requests */ 5347 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5348 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5349 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5350 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5351 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5352 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5353 5354 /* communications */ 5355 ptr_idxs = recv_buffer_idxs; 5356 ptr_vals = recv_buffer_vals; 5357 ptr_idxs_is = recv_buffer_idxs_is; 5358 for (i=0;i<n_recvs;i++) { 5359 source_dest = onodes[i]; 5360 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5361 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5362 ptr_idxs += olengths_idxs[i]; 5363 ptr_vals += olengths_vals[i]; 5364 if (nis) { 5365 source_dest = onodes_is[i]; 5366 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5367 ptr_idxs_is += olengths_idxs_is[i]; 5368 } 5369 } 5370 for (i=0;i<n_sends;i++) { 5371 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5372 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5373 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5374 if (nis) { 5375 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5376 } 5377 } 5378 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5379 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5380 5381 /* assemble new l2g map */ 5382 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5383 ptr_idxs = recv_buffer_idxs; 5384 new_local_rows = 0; 5385 for (i=0;i<n_recvs;i++) { 5386 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5387 ptr_idxs += olengths_idxs[i]; 5388 } 5389 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5390 ptr_idxs = recv_buffer_idxs; 5391 new_local_rows = 0; 5392 for (i=0;i<n_recvs;i++) { 5393 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5394 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5395 ptr_idxs += olengths_idxs[i]; 5396 } 5397 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5398 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5399 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5400 5401 /* infer new local matrix type from received local matrices type */ 5402 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5403 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5404 if (n_recvs) { 5405 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5406 ptr_idxs = recv_buffer_idxs; 5407 for (i=0;i<n_recvs;i++) { 5408 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5409 new_local_type_private = MATAIJ_PRIVATE; 5410 break; 5411 } 5412 ptr_idxs += olengths_idxs[i]; 5413 } 5414 switch (new_local_type_private) { 5415 case MATDENSE_PRIVATE: 5416 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5417 new_local_type = MATSEQAIJ; 5418 bs = 1; 5419 } else { /* if I receive only 1 dense matrix */ 5420 new_local_type = MATSEQDENSE; 5421 bs = 1; 5422 } 5423 break; 5424 case MATAIJ_PRIVATE: 5425 new_local_type = MATSEQAIJ; 5426 bs = 1; 5427 break; 5428 case MATBAIJ_PRIVATE: 5429 new_local_type = MATSEQBAIJ; 5430 break; 5431 case MATSBAIJ_PRIVATE: 5432 new_local_type = MATSEQSBAIJ; 5433 break; 5434 default: 5435 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5436 break; 5437 } 5438 } else { /* by default, new_local_type is seqdense */ 5439 new_local_type = MATSEQDENSE; 5440 bs = 1; 5441 } 5442 5443 /* create MATIS object if needed */ 5444 if (!reuse) { 5445 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5446 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5447 } else { 5448 /* it also destroys the local matrices */ 5449 if (*mat_n) { 5450 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5451 } else { /* this is a fake object */ 5452 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5453 } 5454 } 5455 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5456 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5457 5458 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5459 5460 /* Global to local map of received indices */ 5461 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5462 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5463 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5464 5465 /* restore attributes -> type of incoming data and its size */ 5466 buf_size_idxs = 0; 5467 for (i=0;i<n_recvs;i++) { 5468 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5469 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5470 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5471 } 5472 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5473 5474 /* set preallocation */ 5475 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5476 if (!newisdense) { 5477 PetscInt *new_local_nnz=0; 5478 5479 ptr_idxs = recv_buffer_idxs_local; 5480 if (n_recvs) { 5481 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5482 } 5483 for (i=0;i<n_recvs;i++) { 5484 PetscInt j; 5485 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5486 for (j=0;j<*(ptr_idxs+1);j++) { 5487 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5488 } 5489 } else { 5490 /* TODO */ 5491 } 5492 ptr_idxs += olengths_idxs[i]; 5493 } 5494 if (new_local_nnz) { 5495 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5496 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5497 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5498 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5499 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5500 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5501 } else { 5502 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5503 } 5504 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5505 } else { 5506 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5507 } 5508 5509 /* set values */ 5510 ptr_vals = recv_buffer_vals; 5511 ptr_idxs = recv_buffer_idxs_local; 5512 for (i=0;i<n_recvs;i++) { 5513 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5514 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5515 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5516 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5517 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5518 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5519 } else { 5520 /* TODO */ 5521 } 5522 ptr_idxs += olengths_idxs[i]; 5523 ptr_vals += olengths_vals[i]; 5524 } 5525 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5526 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5527 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5528 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5529 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5530 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5531 5532 #if 0 5533 if (!restrict_comm) { /* check */ 5534 Vec lvec,rvec; 5535 PetscReal infty_error; 5536 5537 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5538 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5539 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5540 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5541 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5542 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5543 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5544 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5545 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5546 } 5547 #endif 5548 5549 /* assemble new additional is (if any) */ 5550 if (nis) { 5551 PetscInt **temp_idxs,*count_is,j,psum; 5552 5553 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5554 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5555 ptr_idxs = recv_buffer_idxs_is; 5556 psum = 0; 5557 for (i=0;i<n_recvs;i++) { 5558 for (j=0;j<nis;j++) { 5559 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5560 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5561 psum += plen; 5562 ptr_idxs += plen+1; /* shift pointer to received data */ 5563 } 5564 } 5565 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5566 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5567 for (i=1;i<nis;i++) { 5568 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5569 } 5570 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5571 ptr_idxs = recv_buffer_idxs_is; 5572 for (i=0;i<n_recvs;i++) { 5573 for (j=0;j<nis;j++) { 5574 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5575 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5576 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5577 ptr_idxs += plen+1; /* shift pointer to received data */ 5578 } 5579 } 5580 for (i=0;i<nis;i++) { 5581 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5582 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5583 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5584 } 5585 ierr = PetscFree(count_is);CHKERRQ(ierr); 5586 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5587 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5588 } 5589 /* free workspace */ 5590 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5591 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5592 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5593 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5594 if (isdense) { 5595 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5596 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5597 } else { 5598 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5599 } 5600 if (nis) { 5601 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5602 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5603 } 5604 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5605 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5606 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5607 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5608 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5609 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5610 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5611 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5612 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5613 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5614 ierr = PetscFree(onodes);CHKERRQ(ierr); 5615 if (nis) { 5616 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5617 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5618 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5619 } 5620 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5621 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5622 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5623 for (i=0;i<nis;i++) { 5624 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5625 } 5626 *mat_n = NULL; 5627 } 5628 PetscFunctionReturn(0); 5629 } 5630 5631 /* temporary hack into ksp private data structure */ 5632 #include <petsc/private/kspimpl.h> 5633 5634 #undef __FUNCT__ 5635 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5636 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5637 { 5638 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5639 PC_IS *pcis = (PC_IS*)pc->data; 5640 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5641 MatNullSpace CoarseNullSpace = NULL; 5642 ISLocalToGlobalMapping coarse_islg; 5643 IS coarse_is,*isarray; 5644 PetscInt i,im_active=-1,active_procs=-1; 5645 PetscInt nis,nisdofs,nisneu,nisvert; 5646 PC pc_temp; 5647 PCType coarse_pc_type; 5648 KSPType coarse_ksp_type; 5649 PetscBool multilevel_requested,multilevel_allowed; 5650 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5651 Mat t_coarse_mat_is; 5652 PetscInt ncoarse; 5653 PetscBool compute_vecs = PETSC_FALSE; 5654 PetscScalar *array; 5655 MatReuse coarse_mat_reuse; 5656 PetscBool restr, full_restr, have_void; 5657 PetscErrorCode ierr; 5658 5659 PetscFunctionBegin; 5660 /* Assign global numbering to coarse dofs */ 5661 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5662 PetscInt ocoarse_size; 5663 compute_vecs = PETSC_TRUE; 5664 ocoarse_size = pcbddc->coarse_size; 5665 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5666 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5667 /* see if we can avoid some work */ 5668 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5669 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5670 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5671 PC pc; 5672 PetscBool isbddc; 5673 5674 /* temporary workaround since PCBDDC does not have a reset method so far */ 5675 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5676 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5677 if (isbddc) { 5678 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5679 } else { 5680 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5681 } 5682 coarse_reuse = PETSC_FALSE; 5683 } else { /* we can safely reuse already computed coarse matrix */ 5684 coarse_reuse = PETSC_TRUE; 5685 } 5686 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5687 coarse_reuse = PETSC_FALSE; 5688 } 5689 /* reset any subassembling information */ 5690 if (!coarse_reuse || pcbddc->recompute_topography) { 5691 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5692 } 5693 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5694 coarse_reuse = PETSC_TRUE; 5695 } 5696 /* assemble coarse matrix */ 5697 if (coarse_reuse && pcbddc->coarse_ksp) { 5698 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5699 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5700 coarse_mat_reuse = MAT_REUSE_MATRIX; 5701 } else { 5702 coarse_mat = NULL; 5703 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5704 } 5705 5706 /* creates temporary l2gmap and IS for coarse indexes */ 5707 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5708 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5709 5710 /* creates temporary MATIS object for coarse matrix */ 5711 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5712 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5713 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5714 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5715 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5716 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5717 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5718 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5719 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5720 5721 /* count "active" (i.e. with positive local size) and "void" processes */ 5722 im_active = !!(pcis->n); 5723 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5724 5725 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5726 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5727 /* full_restr : just use the receivers from the subassembling pattern */ 5728 coarse_mat_is = NULL; 5729 multilevel_allowed = PETSC_FALSE; 5730 multilevel_requested = PETSC_FALSE; 5731 full_restr = PETSC_TRUE; 5732 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5733 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5734 if (multilevel_requested) { 5735 ncoarse = active_procs/pcbddc->coarsening_ratio; 5736 restr = PETSC_FALSE; 5737 full_restr = PETSC_FALSE; 5738 } else { 5739 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5740 restr = PETSC_TRUE; 5741 full_restr = PETSC_TRUE; 5742 } 5743 ncoarse = PetscMax(1,ncoarse); 5744 if (!pcbddc->coarse_subassembling) { 5745 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5746 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5747 PetscInt psum; 5748 PetscMPIInt size; 5749 if (pcbddc->coarse_ksp) psum = 1; 5750 else psum = 0; 5751 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5752 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5753 if (ncoarse < size) have_void = PETSC_TRUE; 5754 } 5755 /* determine if we can go multilevel */ 5756 if (multilevel_requested) { 5757 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5758 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5759 } 5760 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5761 5762 /* dump subassembling pattern */ 5763 if (pcbddc->dbg_flag && multilevel_allowed) { 5764 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5765 } 5766 5767 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5768 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5769 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5770 const PetscInt *idxs; 5771 ISLocalToGlobalMapping tmap; 5772 5773 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5774 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5775 /* allocate space for temporary storage */ 5776 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5777 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5778 /* allocate for IS array */ 5779 nisdofs = pcbddc->n_ISForDofsLocal; 5780 nisneu = !!pcbddc->NeumannBoundariesLocal; 5781 nisvert = 0; /* nisvert is not used */ 5782 nis = nisdofs + nisneu + nisvert; 5783 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5784 /* dofs splitting */ 5785 for (i=0;i<nisdofs;i++) { 5786 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5787 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5788 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5789 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5790 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5791 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5792 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5793 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5794 } 5795 /* neumann boundaries */ 5796 if (pcbddc->NeumannBoundariesLocal) { 5797 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5798 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5799 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5800 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5801 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5802 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5803 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5804 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5805 } 5806 /* free memory */ 5807 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5808 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5809 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5810 } else { 5811 nis = 0; 5812 nisdofs = 0; 5813 nisneu = 0; 5814 nisvert = 0; 5815 isarray = NULL; 5816 } 5817 /* destroy no longer needed map */ 5818 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5819 5820 /* subassemble */ 5821 if (multilevel_allowed) { 5822 PetscBool reuse,reuser; 5823 if (coarse_mat) reuse = PETSC_TRUE; 5824 else reuse = PETSC_FALSE; 5825 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5826 if (reuser) { 5827 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5828 } else { 5829 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5830 } 5831 /* TODO: if (pcbddc->benign_have_null) -> give a hint to the coarser levels if they have to locally apply the benign trick or not */ 5832 } else { 5833 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5834 } 5835 if (coarse_mat_is || coarse_mat) { 5836 PetscMPIInt size; 5837 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5838 if (!multilevel_allowed) { 5839 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5840 } else { 5841 Mat A; 5842 5843 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5844 if (coarse_mat_is) { 5845 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5846 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5847 coarse_mat = coarse_mat_is; 5848 } 5849 /* be sure we don't have MatSeqDENSE as local mat */ 5850 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5851 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5852 } 5853 } 5854 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5855 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5856 5857 /* create local to global scatters for coarse problem */ 5858 if (compute_vecs) { 5859 PetscInt lrows; 5860 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5861 if (coarse_mat) { 5862 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5863 } else { 5864 lrows = 0; 5865 } 5866 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5867 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5868 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5869 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5870 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5871 } 5872 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5873 5874 /* set defaults for coarse KSP and PC */ 5875 if (multilevel_allowed) { 5876 coarse_ksp_type = KSPRICHARDSON; 5877 coarse_pc_type = PCBDDC; 5878 } else { 5879 coarse_ksp_type = KSPPREONLY; 5880 coarse_pc_type = PCREDUNDANT; 5881 } 5882 5883 /* print some info if requested */ 5884 if (pcbddc->dbg_flag) { 5885 if (!multilevel_allowed) { 5886 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5887 if (multilevel_requested) { 5888 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5889 } else if (pcbddc->max_levels) { 5890 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5891 } 5892 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5893 } 5894 } 5895 5896 /* create the coarse KSP object only once with defaults */ 5897 if (coarse_mat) { 5898 PetscViewer dbg_viewer = NULL; 5899 if (pcbddc->dbg_flag) { 5900 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5901 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5902 } 5903 if (!pcbddc->coarse_ksp) { 5904 char prefix[256],str_level[16]; 5905 size_t len; 5906 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5907 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5908 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5909 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5910 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5911 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5912 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5913 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5914 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5915 /* prefix */ 5916 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5917 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5918 if (!pcbddc->current_level) { 5919 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5920 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5921 } else { 5922 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5923 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5924 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5925 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5926 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5927 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5928 } 5929 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5930 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5931 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5932 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5933 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5934 /* allow user customization */ 5935 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5936 } 5937 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5938 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5939 if (nisdofs) { 5940 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5941 for (i=0;i<nisdofs;i++) { 5942 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5943 } 5944 } 5945 if (nisneu) { 5946 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5947 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5948 } 5949 if (nisvert) { 5950 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5951 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5952 } 5953 5954 /* get some info after set from options */ 5955 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5956 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5957 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5958 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5959 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5960 isbddc = PETSC_FALSE; 5961 } 5962 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5963 if (isredundant) { 5964 KSP inner_ksp; 5965 PC inner_pc; 5966 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5967 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5968 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5969 } 5970 5971 /* parameters which miss an API */ 5972 if (isbddc) { 5973 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5974 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5975 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5976 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5977 if (pcbddc_coarse->benign_saddle_point) { 5978 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5979 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5980 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5981 } 5982 } 5983 5984 /* propagate symmetry info of coarse matrix */ 5985 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5986 if (pc->pmat->symmetric_set) { 5987 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5988 } 5989 if (pc->pmat->hermitian_set) { 5990 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5991 } 5992 if (pc->pmat->spd_set) { 5993 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5994 } 5995 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 5996 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 5997 } 5998 /* set operators */ 5999 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6000 if (pcbddc->dbg_flag) { 6001 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6002 } 6003 } 6004 ierr = PetscFree(isarray);CHKERRQ(ierr); 6005 #if 0 6006 { 6007 PetscViewer viewer; 6008 char filename[256]; 6009 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6010 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6011 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6012 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6013 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6014 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6015 } 6016 #endif 6017 6018 if (pcbddc->coarse_ksp) { 6019 Vec crhs,csol; 6020 6021 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6022 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6023 if (!csol) { 6024 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6025 } 6026 if (!crhs) { 6027 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6028 } 6029 } 6030 6031 /* compute null space for coarse solver if the benign trick has been requested */ 6032 if (pcbddc->benign_null) { 6033 6034 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6035 for (i=0;i<pcbddc->benign_n;i++) { 6036 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6037 } 6038 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6039 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6040 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6041 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6042 if (coarse_mat) { 6043 Vec nullv; 6044 PetscScalar *array,*array2; 6045 PetscInt nl; 6046 6047 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6048 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6049 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6050 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6051 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6052 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6053 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6054 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6055 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6056 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6057 } 6058 } 6059 6060 if (pcbddc->coarse_ksp) { 6061 PetscBool ispreonly; 6062 6063 if (CoarseNullSpace) { 6064 PetscBool isnull; 6065 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6066 if (isnull) { 6067 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6068 } 6069 /* TODO: add local nullspaces (if any) */ 6070 } 6071 /* setup coarse ksp */ 6072 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6073 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6074 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6075 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6076 KSP check_ksp; 6077 KSPType check_ksp_type; 6078 PC check_pc; 6079 Vec check_vec,coarse_vec; 6080 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6081 PetscInt its; 6082 PetscBool compute_eigs; 6083 PetscReal *eigs_r,*eigs_c; 6084 PetscInt neigs; 6085 const char *prefix; 6086 6087 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6088 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6089 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6090 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6091 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6092 /* prevent from setup unneeded object */ 6093 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6094 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6095 if (ispreonly) { 6096 check_ksp_type = KSPPREONLY; 6097 compute_eigs = PETSC_FALSE; 6098 } else { 6099 check_ksp_type = KSPGMRES; 6100 compute_eigs = PETSC_TRUE; 6101 } 6102 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6103 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6104 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6105 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6106 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6107 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6108 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6109 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6110 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6111 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6112 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6113 /* create random vec */ 6114 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6115 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6116 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6117 /* solve coarse problem */ 6118 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6119 /* set eigenvalue estimation if preonly has not been requested */ 6120 if (compute_eigs) { 6121 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6122 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6123 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6124 lambda_max = eigs_r[neigs-1]; 6125 lambda_min = eigs_r[0]; 6126 if (pcbddc->use_coarse_estimates) { 6127 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6128 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6129 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6130 } 6131 } 6132 } 6133 6134 /* check coarse problem residual error */ 6135 if (pcbddc->dbg_flag) { 6136 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6137 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6138 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6139 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6140 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6141 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6142 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6143 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6144 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6145 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6146 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6147 if (CoarseNullSpace) { 6148 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6149 } 6150 if (compute_eigs) { 6151 PetscReal lambda_max_s,lambda_min_s; 6152 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6153 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6154 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6155 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6156 for (i=0;i<neigs;i++) { 6157 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6158 } 6159 } 6160 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6161 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6162 } 6163 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6164 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6165 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6166 if (compute_eigs) { 6167 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6168 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6169 } 6170 } 6171 } 6172 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6173 /* print additional info */ 6174 if (pcbddc->dbg_flag) { 6175 /* waits until all processes reaches this point */ 6176 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6177 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6178 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6179 } 6180 6181 /* free memory */ 6182 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6183 PetscFunctionReturn(0); 6184 } 6185 6186 #undef __FUNCT__ 6187 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6188 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6189 { 6190 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6191 PC_IS* pcis = (PC_IS*)pc->data; 6192 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6193 IS subset,subset_mult,subset_n; 6194 PetscInt local_size,coarse_size=0; 6195 PetscInt *local_primal_indices=NULL; 6196 const PetscInt *t_local_primal_indices; 6197 PetscErrorCode ierr; 6198 6199 PetscFunctionBegin; 6200 /* Compute global number of coarse dofs */ 6201 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6202 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6203 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6204 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6205 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6206 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6207 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6208 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6209 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6210 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6211 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6212 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6213 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6214 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6215 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6216 6217 /* check numbering */ 6218 if (pcbddc->dbg_flag) { 6219 PetscScalar coarsesum,*array,*array2; 6220 PetscInt i; 6221 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6222 6223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6225 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6226 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6227 /* counter */ 6228 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6229 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6230 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6231 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6232 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6233 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6234 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6235 for (i=0;i<pcbddc->local_primal_size;i++) { 6236 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6237 } 6238 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6239 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6240 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6241 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6242 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6243 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6244 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6245 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6246 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6247 for (i=0;i<pcis->n;i++) { 6248 if (array[i] != 0.0 && array[i] != array2[i]) { 6249 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6250 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6251 set_error = PETSC_TRUE; 6252 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6253 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6254 } 6255 } 6256 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6257 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6258 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6259 for (i=0;i<pcis->n;i++) { 6260 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6261 } 6262 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6263 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6264 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6265 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6266 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6268 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6269 PetscInt *gidxs; 6270 6271 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6272 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6273 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6276 for (i=0;i<pcbddc->local_primal_size;i++) { 6277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6278 } 6279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6280 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6281 } 6282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6283 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6284 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6285 } 6286 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6287 /* get back data */ 6288 *coarse_size_n = coarse_size; 6289 *local_primal_indices_n = local_primal_indices; 6290 PetscFunctionReturn(0); 6291 } 6292 6293 #undef __FUNCT__ 6294 #define __FUNCT__ "PCBDDCGlobalToLocal" 6295 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6296 { 6297 IS localis_t; 6298 PetscInt i,lsize,*idxs,n; 6299 PetscScalar *vals; 6300 PetscErrorCode ierr; 6301 6302 PetscFunctionBegin; 6303 /* get indices in local ordering exploiting local to global map */ 6304 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6305 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6306 for (i=0;i<lsize;i++) vals[i] = 1.0; 6307 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6308 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6309 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6310 if (idxs) { /* multilevel guard */ 6311 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6312 } 6313 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6314 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6315 ierr = PetscFree(vals);CHKERRQ(ierr); 6316 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6317 /* now compute set in local ordering */ 6318 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6319 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6320 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6321 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6322 for (i=0,lsize=0;i<n;i++) { 6323 if (PetscRealPart(vals[i]) > 0.5) { 6324 lsize++; 6325 } 6326 } 6327 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6328 for (i=0,lsize=0;i<n;i++) { 6329 if (PetscRealPart(vals[i]) > 0.5) { 6330 idxs[lsize++] = i; 6331 } 6332 } 6333 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6334 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6335 *localis = localis_t; 6336 PetscFunctionReturn(0); 6337 } 6338 6339 #undef __FUNCT__ 6340 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6341 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6342 { 6343 PC_IS *pcis=(PC_IS*)pc->data; 6344 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6345 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6346 Mat S_j; 6347 PetscInt *used_xadj,*used_adjncy; 6348 PetscBool free_used_adj; 6349 PetscErrorCode ierr; 6350 6351 PetscFunctionBegin; 6352 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6353 free_used_adj = PETSC_FALSE; 6354 if (pcbddc->sub_schurs_layers == -1) { 6355 used_xadj = NULL; 6356 used_adjncy = NULL; 6357 } else { 6358 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6359 used_xadj = pcbddc->mat_graph->xadj; 6360 used_adjncy = pcbddc->mat_graph->adjncy; 6361 } else if (pcbddc->computed_rowadj) { 6362 used_xadj = pcbddc->mat_graph->xadj; 6363 used_adjncy = pcbddc->mat_graph->adjncy; 6364 } else { 6365 PetscBool flg_row=PETSC_FALSE; 6366 const PetscInt *xadj,*adjncy; 6367 PetscInt nvtxs; 6368 6369 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6370 if (flg_row) { 6371 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6372 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6373 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6374 free_used_adj = PETSC_TRUE; 6375 } else { 6376 pcbddc->sub_schurs_layers = -1; 6377 used_xadj = NULL; 6378 used_adjncy = NULL; 6379 } 6380 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6381 } 6382 } 6383 6384 /* setup sub_schurs data */ 6385 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6386 if (!sub_schurs->schur_explicit) { 6387 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6388 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6389 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6390 } else { 6391 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6392 PetscBool isseqaij,need_change = PETSC_FALSE;; 6393 PetscInt benign_n; 6394 Mat change = NULL; 6395 Vec scaling = NULL; 6396 IS change_primal = NULL; 6397 6398 if (!pcbddc->use_vertices && reuse_solvers) { 6399 PetscInt n_vertices; 6400 6401 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6402 reuse_solvers = (PetscBool)!n_vertices; 6403 } 6404 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6405 if (!isseqaij) { 6406 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6407 if (matis->A == pcbddc->local_mat) { 6408 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6409 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6410 } else { 6411 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6412 } 6413 } 6414 if (!pcbddc->benign_change_explicit) { 6415 benign_n = pcbddc->benign_n; 6416 } else { 6417 benign_n = 0; 6418 } 6419 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6420 We need a global reduction to avoid possible deadlocks. 6421 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6422 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6423 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6424 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6425 need_change = (PetscBool)(!need_change); 6426 } 6427 /* If the user defines additional constraints, we import them here. 6428 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6429 if (need_change) { 6430 PC_IS *pcisf; 6431 PC_BDDC *pcbddcf; 6432 PC pcf; 6433 6434 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6435 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6436 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6437 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6438 /* hacks */ 6439 pcisf = (PC_IS*)pcf->data; 6440 pcisf->is_B_local = pcis->is_B_local; 6441 pcisf->vec1_N = pcis->vec1_N; 6442 pcisf->BtoNmap = pcis->BtoNmap; 6443 pcisf->n = pcis->n; 6444 pcisf->n_B = pcis->n_B; 6445 pcbddcf = (PC_BDDC*)pcf->data; 6446 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6447 pcbddcf->mat_graph = pcbddc->mat_graph; 6448 pcbddcf->use_faces = PETSC_TRUE; 6449 pcbddcf->use_change_of_basis = PETSC_TRUE; 6450 pcbddcf->use_change_on_faces = PETSC_TRUE; 6451 pcbddcf->use_qr_single = PETSC_TRUE; 6452 pcbddcf->fake_change = PETSC_TRUE; 6453 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6454 /* store information on primal vertices and change of basis (in local numbering) */ 6455 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6456 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6457 change = pcbddcf->ConstraintMatrix; 6458 pcbddcf->ConstraintMatrix = NULL; 6459 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6460 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6461 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6462 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6463 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6464 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6465 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6466 pcf->ops->destroy = NULL; 6467 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6468 } 6469 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6470 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6471 ierr = MatDestroy(&change);CHKERRQ(ierr); 6472 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6473 } 6474 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6475 6476 /* free adjacency */ 6477 if (free_used_adj) { 6478 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6479 } 6480 PetscFunctionReturn(0); 6481 } 6482 6483 #undef __FUNCT__ 6484 #define __FUNCT__ "PCBDDCInitSubSchurs" 6485 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6486 { 6487 PC_IS *pcis=(PC_IS*)pc->data; 6488 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6489 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6490 PCBDDCGraph graph; 6491 PetscErrorCode ierr; 6492 6493 PetscFunctionBegin; 6494 /* attach interface graph for determining subsets */ 6495 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6496 IS verticesIS,verticescomm; 6497 PetscInt vsize,*idxs; 6498 6499 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6500 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6501 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6502 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6503 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6504 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6505 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6506 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6507 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6508 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6509 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6510 } else { 6511 graph = pcbddc->mat_graph; 6512 } 6513 /* print some info */ 6514 if (pcbddc->dbg_flag) { 6515 IS vertices; 6516 PetscInt nv,nedges,nfaces; 6517 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6518 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6519 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6520 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6521 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6523 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6525 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6527 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6528 } 6529 6530 /* sub_schurs init */ 6531 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6532 6533 /* free graph struct */ 6534 if (pcbddc->sub_schurs_rebuild) { 6535 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6536 } 6537 PetscFunctionReturn(0); 6538 } 6539 6540 #undef __FUNCT__ 6541 #define __FUNCT__ "PCBDDCCheckOperator" 6542 PetscErrorCode PCBDDCCheckOperator(PC pc) 6543 { 6544 PC_IS *pcis=(PC_IS*)pc->data; 6545 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6546 PetscErrorCode ierr; 6547 6548 PetscFunctionBegin; 6549 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6550 IS zerodiag = NULL; 6551 Mat S_j,B0_B=NULL; 6552 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6553 PetscScalar *p0_check,*array,*array2; 6554 PetscReal norm; 6555 PetscInt i; 6556 6557 /* B0 and B0_B */ 6558 if (zerodiag) { 6559 IS dummy; 6560 6561 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6562 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6563 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6564 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6565 } 6566 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6567 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6568 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6569 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6570 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6571 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6572 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6573 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6574 /* S_j */ 6575 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6576 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6577 6578 /* mimic vector in \widetilde{W}_\Gamma */ 6579 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6580 /* continuous in primal space */ 6581 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6582 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6583 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6584 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6585 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6586 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6587 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6588 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6589 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6590 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6591 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6592 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6593 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6594 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6595 6596 /* assemble rhs for coarse problem */ 6597 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6598 /* local with Schur */ 6599 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6600 if (zerodiag) { 6601 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6602 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6603 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6604 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6605 } 6606 /* sum on primal nodes the local contributions */ 6607 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6608 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6609 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6610 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6611 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6612 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6613 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6614 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6615 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6616 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6617 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6618 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6619 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6620 /* scale primal nodes (BDDC sums contibutions) */ 6621 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6622 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6623 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6624 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6625 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6626 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6627 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6628 /* global: \widetilde{B0}_B w_\Gamma */ 6629 if (zerodiag) { 6630 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6631 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6632 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6633 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6634 } 6635 /* BDDC */ 6636 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6637 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6638 6639 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6640 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6641 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6642 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6643 for (i=0;i<pcbddc->benign_n;i++) { 6644 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6645 } 6646 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6647 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6648 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6649 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6650 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6651 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6652 } 6653 PetscFunctionReturn(0); 6654 } 6655