1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 9 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 10 { 11 PetscErrorCode ierr; 12 Vec local,global; 13 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 14 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 15 16 PetscFunctionBegin; 17 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 18 /* need to convert from global to local topology information and remove references to information in global ordering */ 19 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 20 if (pcbddc->user_provided_isfordofs) { 21 if (pcbddc->n_ISForDofs) { 22 PetscInt i; 23 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 24 for (i=0;i<pcbddc->n_ISForDofs;i++) { 25 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 26 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 27 } 28 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 29 pcbddc->n_ISForDofs = 0; 30 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 31 } 32 } else { 33 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 34 PetscInt i, n = matis->A->rmap->n; 35 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 36 if (i > 1) { 37 pcbddc->n_ISForDofsLocal = i; 38 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 39 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 40 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 41 } 42 } 43 } 44 } 45 46 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 47 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 48 } 49 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 50 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 51 } 52 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 53 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 54 } 55 ierr = VecDestroy(&global);CHKERRQ(ierr); 56 ierr = VecDestroy(&local);CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 62 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 63 { 64 PC_IS *pcis = (PC_IS*)(pc->data); 65 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 if (!pcbddc->benign_have_null) { 70 PetscFunctionReturn(0); 71 } 72 if (pcbddc->ChangeOfBasisMatrix) { 73 Vec swap; 74 75 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 76 swap = pcbddc->work_change; 77 pcbddc->work_change = r; 78 r = swap; 79 } 80 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 82 if (pcbddc->benign_n) { 83 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 84 } else { 85 ierr = VecSet(pcis->vec2_D,0.);CHKERRQ(ierr); 86 } 87 ierr = VecSet(z,0.);CHKERRQ(ierr); 88 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 89 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 90 if (pcbddc->ChangeOfBasisMatrix) { 91 Vec swap; 92 93 swap = r; 94 r = pcbddc->work_change; 95 pcbddc->work_change = swap; 96 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 97 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 104 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 105 { 106 PCBDDCBenignMatMult_ctx ctx; 107 PetscErrorCode ierr; 108 PetscBool apply_right,apply_left,reset_x; 109 110 PetscFunctionBegin; 111 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 112 if (transpose) { 113 apply_right = ctx->apply_left; 114 apply_left = ctx->apply_right; 115 } else { 116 apply_right = ctx->apply_right; 117 apply_left = ctx->apply_left; 118 } 119 reset_x = PETSC_FALSE; 120 if (apply_right) { 121 const PetscScalar *ax; 122 PetscInt nl,i; 123 124 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 125 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 126 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 127 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 128 for (i=0;i<ctx->benign_n;i++) { 129 PetscScalar sum,val; 130 const PetscInt *idxs; 131 PetscInt nz,j; 132 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 133 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 134 sum = 0.; 135 if (ctx->apply_p0) { 136 val = ctx->work[idxs[nz-1]]; 137 for (j=0;j<nz-1;j++) { 138 sum += ctx->work[idxs[j]]; 139 ctx->work[idxs[j]] += val; 140 } 141 } else { 142 for (j=0;j<nz-1;j++) { 143 sum += ctx->work[idxs[j]]; 144 } 145 } 146 ctx->work[idxs[nz-1]] -= sum; 147 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 148 } 149 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 150 reset_x = PETSC_TRUE; 151 } 152 if (transpose) { 153 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 154 } else { 155 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 156 } 157 if (reset_x) { 158 ierr = VecResetArray(x);CHKERRQ(ierr); 159 } 160 if (apply_left) { 161 PetscScalar *ay; 162 PetscInt i; 163 164 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 165 for (i=0;i<ctx->benign_n;i++) { 166 PetscScalar sum,val; 167 const PetscInt *idxs; 168 PetscInt nz,j; 169 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 170 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 171 val = -ay[idxs[nz-1]]; 172 if (ctx->apply_p0) { 173 sum = 0.; 174 for (j=0;j<nz-1;j++) { 175 sum += ay[idxs[j]]; 176 ay[idxs[j]] += val; 177 } 178 ay[idxs[nz-1]] += sum; 179 } else { 180 for (j=0;j<nz-1;j++) { 181 ay[idxs[j]] += val; 182 } 183 ay[idxs[nz-1]] = 0.; 184 } 185 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 186 } 187 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 194 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 195 { 196 PetscErrorCode ierr; 197 198 PetscFunctionBegin; 199 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 205 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 206 { 207 PetscErrorCode ierr; 208 209 PetscFunctionBegin; 210 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "PCBDDCBenignShellMat" 216 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 217 { 218 PC_IS *pcis = (PC_IS*)pc->data; 219 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 220 PCBDDCBenignMatMult_ctx ctx; 221 PetscErrorCode ierr; 222 223 PetscFunctionBegin; 224 if (!restore) { 225 Mat A_IB,A_BI; 226 PetscScalar *work; 227 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 228 229 if (pcbddc->benign_original_mat) { 230 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 231 } 232 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 233 PetscFunctionReturn(0); 234 } 235 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 237 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 238 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 239 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 240 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 241 ierr = PetscNew(&ctx);CHKERRQ(ierr); 242 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 243 ctx->apply_left = PETSC_TRUE; 244 ctx->apply_right = PETSC_FALSE; 245 ctx->apply_p0 = PETSC_FALSE; 246 ctx->benign_n = pcbddc->benign_n; 247 if (reuse) { 248 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 249 ctx->free = PETSC_FALSE; 250 } else { /* TODO: could be optimized for successive solves */ 251 ISLocalToGlobalMapping N_to_D; 252 PetscInt i; 253 254 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 255 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 256 for (i=0;i<pcbddc->benign_n;i++) { 257 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 258 } 259 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 260 ctx->free = PETSC_TRUE; 261 } 262 ctx->A = pcis->A_IB; 263 ctx->work = work; 264 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 265 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 266 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 267 pcis->A_IB = A_IB; 268 269 /* A_BI as A_IB^T */ 270 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 271 pcbddc->benign_original_mat = pcis->A_BI; 272 pcis->A_BI = A_BI; 273 } else { 274 if (!pcbddc->benign_original_mat) { 275 PetscFunctionReturn(0); 276 } 277 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 278 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 279 pcis->A_IB = ctx->A; 280 ctx->A = NULL; 281 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 282 pcis->A_BI = pcbddc->benign_original_mat; 283 pcbddc->benign_original_mat = NULL; 284 if (ctx->free) { 285 PetscInt i; 286 for (i=0;i<ctx->benign_n;i++) { 287 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 288 } 289 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 290 } 291 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 292 ierr = PetscFree(ctx);CHKERRQ(ierr); 293 } 294 PetscFunctionReturn(0); 295 } 296 297 /* used just in bddc debug mode */ 298 #undef __FUNCT__ 299 #define __FUNCT__ "PCBDDCBenignProject" 300 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 301 { 302 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 303 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 304 Mat An; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 309 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 310 if (is1) { 311 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 312 ierr = MatDestroy(&An);CHKERRQ(ierr); 313 } else { 314 *B = An; 315 } 316 PetscFunctionReturn(0); 317 } 318 319 /* TODO: add reuse flag */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSeqAIJCompress" 322 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 323 { 324 Mat Bt; 325 PetscScalar *a,*bdata; 326 const PetscInt *ii,*ij; 327 PetscInt m,n,i,nnz,*bii,*bij; 328 PetscBool flg_row; 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 333 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 334 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 335 nnz = n; 336 for (i=0;i<ii[n];i++) { 337 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 338 } 339 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 340 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 341 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 342 nnz = 0; 343 bii[0] = 0; 344 for (i=0;i<n;i++) { 345 PetscInt j; 346 for (j=ii[i];j<ii[i+1];j++) { 347 PetscScalar entry = a[j]; 348 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 349 bij[nnz] = ij[j]; 350 bdata[nnz] = entry; 351 nnz++; 352 } 353 } 354 bii[i+1] = nnz; 355 } 356 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 357 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 358 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 359 { 360 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 361 b->free_a = PETSC_TRUE; 362 b->free_ij = PETSC_TRUE; 363 } 364 *B = Bt; 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatDetectDisconnectedComponents" 370 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 371 { 372 Mat B; 373 IS is_dummy,*cc_n; 374 ISLocalToGlobalMapping l2gmap_dummy; 375 PCBDDCGraph graph; 376 PetscInt i,n; 377 PetscInt *xadj,*adjncy; 378 PetscInt *xadj_filtered,*adjncy_filtered; 379 PetscBool flg_row,isseqaij; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 if (!A->rmap->N || !A->cmap->N) { 384 *ncc = 0; 385 *cc = NULL; 386 PetscFunctionReturn(0); 387 } 388 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 389 if (!isseqaij && filter) { 390 PetscBool isseqdense; 391 392 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 393 if (!isseqdense) { 394 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 395 } else { /* TODO: rectangular case and LDA */ 396 PetscScalar *array; 397 PetscReal chop=1.e-6; 398 399 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 400 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 401 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 402 for (i=0;i<n;i++) { 403 PetscInt j; 404 for (j=i+1;j<n;j++) { 405 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 406 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 407 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 408 } 409 } 410 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 411 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 412 } 413 } else { 414 B = A; 415 } 416 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 417 418 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 419 if (filter) { 420 PetscScalar *data; 421 PetscInt j,cum; 422 423 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 424 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 425 cum = 0; 426 for (i=0;i<n;i++) { 427 PetscInt t; 428 429 for (j=xadj[i];j<xadj[i+1];j++) { 430 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 431 continue; 432 } 433 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 434 } 435 t = xadj_filtered[i]; 436 xadj_filtered[i] = cum; 437 cum += t; 438 } 439 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 440 } else { 441 xadj_filtered = NULL; 442 adjncy_filtered = NULL; 443 } 444 445 /* compute local connected components using PCBDDCGraph */ 446 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 447 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 448 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 449 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 450 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 451 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 452 if (xadj_filtered) { 453 graph->xadj = xadj_filtered; 454 graph->adjncy = adjncy_filtered; 455 } else { 456 graph->xadj = xadj; 457 graph->adjncy = adjncy; 458 } 459 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 460 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 461 /* partial clean up */ 462 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 463 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 464 if (A != B) { 465 ierr = MatDestroy(&B);CHKERRQ(ierr); 466 } 467 468 /* get back data */ 469 if (ncc) *ncc = graph->ncc; 470 if (cc) { 471 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 472 for (i=0;i<graph->ncc;i++) { 473 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 474 } 475 *cc = cc_n; 476 } 477 /* clean up graph */ 478 graph->xadj = 0; 479 graph->adjncy = 0; 480 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCBDDCBenignCheck" 486 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 487 { 488 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 489 PC_IS* pcis = (PC_IS*)(pc->data); 490 IS dirIS = NULL; 491 PetscInt i; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 496 if (zerodiag) { 497 Mat A; 498 Vec vec3_N; 499 PetscScalar *vals; 500 const PetscInt *idxs; 501 PetscInt nz,*count; 502 503 /* p0 */ 504 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 505 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 506 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 507 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 508 for (i=0;i<nz;i++) vals[i] = 1.; 509 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 512 /* v_I */ 513 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 514 for (i=0;i<nz;i++) vals[i] = 0.; 515 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 516 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 517 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 518 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 519 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 520 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 521 if (dirIS) { 522 PetscInt n; 523 524 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 525 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 526 for (i=0;i<n;i++) vals[i] = 0.; 527 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 528 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 529 } 530 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 532 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 533 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 534 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 535 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 536 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 537 if (PetscAbsScalar(vals[0]) > 1.e-1) { 538 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 539 } 540 ierr = PetscFree(vals);CHKERRQ(ierr); 541 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 542 543 /* there should not be any pressure dofs lying on the interface */ 544 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 545 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 546 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 547 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 548 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 549 for (i=0;i<nz;i++) { 550 if (count[idxs[i]]) { 551 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 552 } 553 } 554 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 555 ierr = PetscFree(count);CHKERRQ(ierr); 556 } 557 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 558 559 /* check PCBDDCBenignGetOrSetP0 */ 560 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 561 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 562 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 563 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 564 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 565 for (i=0;i<pcbddc->benign_n;i++) { 566 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 567 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 568 } 569 } 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 575 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 576 { 577 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 578 IS pressures,zerodiag,*zerodiag_subs; 579 PetscInt nz,n; 580 PetscInt *interior_dofs,n_interior_dofs; 581 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 582 PetscErrorCode ierr; 583 584 PetscFunctionBegin; 585 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 586 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 587 for (n=0;n<pcbddc->benign_n;n++) { 588 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 589 } 590 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 591 pcbddc->benign_n = 0; 592 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 593 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 594 Checks if all the pressure dofs in each subdomain have a zero diagonal 595 If not, a change of basis on pressures is not needed 596 since the local Schur complements are SPD 597 */ 598 has_null_pressures = PETSC_TRUE; 599 have_null = PETSC_TRUE; 600 if (pcbddc->n_ISForDofsLocal) { 601 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 602 603 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 604 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 605 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 607 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 608 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 609 if (!sorted) { 610 ierr = ISSort(pressures);CHKERRQ(ierr); 611 } 612 } else { 613 pressures = NULL; 614 } 615 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 616 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 617 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 618 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 619 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 620 if (!sorted) { 621 ierr = ISSort(zerodiag);CHKERRQ(ierr); 622 } 623 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 624 if (!nz) { 625 if (n) have_null = PETSC_FALSE; 626 has_null_pressures = PETSC_FALSE; 627 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 628 } 629 recompute_zerodiag = PETSC_FALSE; 630 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 631 zerodiag_subs = NULL; 632 pcbddc->benign_n = 0; 633 n_interior_dofs = 0; 634 interior_dofs = NULL; 635 if (pcbddc->current_level) { /* need to compute interior nodes */ 636 PetscInt n,i,j; 637 PetscInt n_neigh,*neigh,*n_shared,**shared; 638 PetscInt *iwork; 639 640 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 641 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 642 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 643 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 644 for (i=0;i<n_neigh;i++) 645 for (j=0;j<n_shared[i];j++) 646 iwork[shared[i][j]] += 1; 647 for (i=0;i<n;i++) 648 if (!iwork[i]) 649 interior_dofs[n_interior_dofs++] = i; 650 ierr = PetscFree(iwork);CHKERRQ(ierr); 651 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 652 } 653 if (has_null_pressures) { 654 IS *subs; 655 PetscInt nsubs,i,j,nl; 656 const PetscInt *idxs; 657 PetscScalar *array; 658 Vec *work; 659 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 660 661 subs = pcbddc->local_subs; 662 nsubs = pcbddc->n_local_subs; 663 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 664 if (pcbddc->current_level) { 665 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 666 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 667 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 668 /* work[0] = 1_p */ 669 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 670 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 671 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 672 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 673 /* work[0] = 1_v */ 674 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 675 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 676 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 677 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 678 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 679 } 680 if (nsubs > 1) { 681 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 682 for (i=0;i<nsubs;i++) { 683 ISLocalToGlobalMapping l2g; 684 IS t_zerodiag_subs; 685 PetscInt nl; 686 687 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 688 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 689 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 690 if (nl) { 691 PetscBool valid = PETSC_TRUE; 692 693 if (pcbddc->current_level) { 694 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 695 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 696 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 697 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 698 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 699 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 701 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 702 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 703 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 704 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 705 for (j=0;j<n_interior_dofs;j++) { 706 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 707 valid = PETSC_FALSE; 708 break; 709 } 710 } 711 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 712 } 713 if (valid && pcbddc->NeumannBoundariesLocal) { 714 IS t_bc; 715 PetscInt nzb; 716 717 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 718 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 719 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 720 if (nzb) valid = PETSC_FALSE; 721 } 722 if (valid && pressures) { 723 IS t_pressure_subs; 724 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 725 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 726 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 727 } 728 if (valid) { 729 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 730 pcbddc->benign_n++; 731 } else { 732 recompute_zerodiag = PETSC_TRUE; 733 } 734 } 735 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 736 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 737 } 738 } else { /* there's just one subdomain (or zero if they have not been detected */ 739 PetscBool valid = PETSC_TRUE; 740 741 if (pcbddc->NeumannBoundariesLocal) { 742 PetscInt nzb; 743 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 744 if (nzb) valid = PETSC_FALSE; 745 } 746 if (valid && pressures) { 747 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 748 } 749 if (valid && pcbddc->current_level) { 750 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 751 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 752 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 753 for (j=0;j<n_interior_dofs;j++) { 754 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 755 valid = PETSC_FALSE; 756 break; 757 } 758 } 759 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 760 } 761 if (valid) { 762 pcbddc->benign_n = 1; 763 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 764 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 765 zerodiag_subs[0] = zerodiag; 766 } 767 } 768 if (pcbddc->current_level) { 769 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 770 } 771 } 772 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 773 774 if (!pcbddc->benign_n) { 775 PetscInt n; 776 777 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 778 recompute_zerodiag = PETSC_FALSE; 779 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 780 if (n) { 781 has_null_pressures = PETSC_FALSE; 782 have_null = PETSC_FALSE; 783 } 784 } 785 786 /* final check for null pressures */ 787 if (zerodiag && pressures) { 788 PetscInt nz,np; 789 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 790 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 791 if (nz != np) have_null = PETSC_FALSE; 792 } 793 794 if (recompute_zerodiag) { 795 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 796 if (pcbddc->benign_n == 1) { 797 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 798 zerodiag = zerodiag_subs[0]; 799 } else { 800 PetscInt i,nzn,*new_idxs; 801 802 nzn = 0; 803 for (i=0;i<pcbddc->benign_n;i++) { 804 PetscInt ns; 805 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 806 nzn += ns; 807 } 808 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 809 nzn = 0; 810 for (i=0;i<pcbddc->benign_n;i++) { 811 PetscInt ns,*idxs; 812 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 813 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 814 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 815 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 816 nzn += ns; 817 } 818 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 819 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 820 } 821 have_null = PETSC_FALSE; 822 } 823 824 /* no-net-flux */ 825 if (pcbddc->benign_compute_nonetflux) { 826 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 827 MatNullSpace near_null_space; 828 ISLocalToGlobalMapping rmap; 829 Vec quad_vec; 830 PetscScalar *pvals; 831 PetscInt i,np,*dummyins; 832 IS isused = NULL; 833 PetscBool participate = PETSC_TRUE; 834 835 /* create vector to hold quadrature weights */ 836 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 837 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 838 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 839 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 840 841 /* compute B^{(i)T} * 1_p */ 842 np = 0; 843 if (pressures) { 844 isused = pressures; 845 } else { 846 isused = zerodiag; 847 } 848 if (isused) { 849 ierr = ISGetLocalSize(isused,&np);CHKERRQ(ierr); 850 } 851 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 852 for (i=0;i<np;i++) pvals[i] = 1.; 853 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 854 if (np) { 855 const PetscInt *pidxs; 856 857 if (isused) { 858 ierr = ISGetIndices(isused,&pidxs);CHKERRQ(ierr); 859 } 860 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 861 if (isused) { 862 ierr = ISRestoreIndices(isused,&pidxs);CHKERRQ(ierr); 863 } 864 } 865 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 866 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 867 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 868 ierr = PetscFree(pvals);CHKERRQ(ierr); 869 if (!isused) participate = PETSC_FALSE; 870 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 871 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 872 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 873 for (i=0;i<matis->sf->nleaves;i++) 874 if (participate) matis->sf_leafdata[i] = PetscGlobalRank; 875 else matis->sf_leafdata[i] = -1; 876 877 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 878 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 879 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 880 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 881 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 882 for (i=0;i<matis->sf->nleaves;i++) { 883 if (PetscGlobalRank != matis->sf_leafdata[i]) { 884 pvals[i] = 0.; 885 } 886 } 887 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 888 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 889 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 890 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 891 ierr = PetscFree(dummyins);CHKERRQ(ierr); 892 893 /* assembly quadrature vec and attach near null space to pmat */ 894 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 895 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 896 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 897 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 898 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 899 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 900 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 901 } 902 903 /* change of basis and p0 dofs */ 904 if (has_null_pressures) { 905 IS zerodiagc; 906 const PetscInt *idxs,*idxsc; 907 PetscInt i,s,*nnz; 908 909 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 910 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 911 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 912 /* local change of basis for pressures */ 913 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 914 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 915 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 916 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 917 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 918 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 919 for (i=0;i<pcbddc->benign_n;i++) { 920 PetscInt nzs,j; 921 922 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 923 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 924 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 925 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 926 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 927 } 928 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 929 ierr = PetscFree(nnz);CHKERRQ(ierr); 930 /* set identity on velocities */ 931 for (i=0;i<n-nz;i++) { 932 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 933 } 934 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 935 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 936 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 937 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 938 /* set change on pressures */ 939 for (s=0;s<pcbddc->benign_n;s++) { 940 PetscScalar *array; 941 PetscInt nzs; 942 943 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 944 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 945 for (i=0;i<nzs-1;i++) { 946 PetscScalar vals[2]; 947 PetscInt cols[2]; 948 949 cols[0] = idxs[i]; 950 cols[1] = idxs[nzs-1]; 951 vals[0] = 1.; 952 vals[1] = 1.; 953 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 954 } 955 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 956 for (i=0;i<nzs-1;i++) array[i] = -1.; 957 array[nzs-1] = 1.; 958 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 959 /* store local idxs for p0 */ 960 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 961 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 962 ierr = PetscFree(array);CHKERRQ(ierr); 963 } 964 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 /* project if needed */ 967 if (pcbddc->benign_change_explicit) { 968 Mat M; 969 970 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 971 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 972 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 973 ierr = MatDestroy(&M);CHKERRQ(ierr); 974 } 975 /* store global idxs for p0 */ 976 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 977 } 978 pcbddc->benign_zerodiag_subs = zerodiag_subs; 979 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 980 981 /* determines if the coarse solver will be singular or not */ 982 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 983 /* determines if the problem has subdomains with 0 pressure block */ 984 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 985 *zerodiaglocal = zerodiag; 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 991 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 992 { 993 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 994 PetscScalar *array; 995 PetscErrorCode ierr; 996 997 PetscFunctionBegin; 998 if (!pcbddc->benign_sf) { 999 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1000 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1001 } 1002 if (get) { 1003 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1004 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1005 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1006 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1007 } else { 1008 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1009 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1010 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1011 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1018 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1019 { 1020 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 /* TODO: add error checking 1025 - avoid nested pop (or push) calls. 1026 - cannot push before pop. 1027 - cannot call this if pcbddc->local_mat is NULL 1028 */ 1029 if (!pcbddc->benign_n) { 1030 PetscFunctionReturn(0); 1031 } 1032 if (pop) { 1033 if (pcbddc->benign_change_explicit) { 1034 IS is_p0; 1035 MatReuse reuse; 1036 1037 /* extract B_0 */ 1038 reuse = MAT_INITIAL_MATRIX; 1039 if (pcbddc->benign_B0) { 1040 reuse = MAT_REUSE_MATRIX; 1041 } 1042 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1043 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1044 /* remove rows and cols from local problem */ 1045 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1046 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1047 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1048 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1049 } else { 1050 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1051 PetscScalar *vals; 1052 PetscInt i,n,*idxs_ins; 1053 1054 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1055 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1056 if (!pcbddc->benign_B0) { 1057 PetscInt *nnz; 1058 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1059 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1060 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1061 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1062 for (i=0;i<pcbddc->benign_n;i++) { 1063 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1064 nnz[i] = n - nnz[i]; 1065 } 1066 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1067 ierr = PetscFree(nnz);CHKERRQ(ierr); 1068 } 1069 1070 for (i=0;i<pcbddc->benign_n;i++) { 1071 PetscScalar *array; 1072 PetscInt *idxs,j,nz,cum; 1073 1074 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1075 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1076 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1077 for (j=0;j<nz;j++) vals[j] = 1.; 1078 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1079 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1080 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1081 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1082 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1083 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1084 cum = 0; 1085 for (j=0;j<n;j++) { 1086 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1087 vals[cum] = array[j]; 1088 idxs_ins[cum] = j; 1089 cum++; 1090 } 1091 } 1092 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1093 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1094 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1095 } 1096 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1097 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1098 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1099 } 1100 } else { /* push */ 1101 if (pcbddc->benign_change_explicit) { 1102 PetscInt i; 1103 1104 for (i=0;i<pcbddc->benign_n;i++) { 1105 PetscScalar *B0_vals; 1106 PetscInt *B0_cols,B0_ncol; 1107 1108 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1109 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1110 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1111 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1112 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1113 } 1114 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1116 } else { 1117 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1118 } 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1125 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1126 { 1127 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1128 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1129 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1130 PetscBLASInt *B_iwork,*B_ifail; 1131 PetscScalar *work,lwork; 1132 PetscScalar *St,*S,*eigv; 1133 PetscScalar *Sarray,*Starray; 1134 PetscReal *eigs,thresh; 1135 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1136 PetscBool allocated_S_St; 1137 #if defined(PETSC_USE_COMPLEX) 1138 PetscReal *rwork; 1139 #endif 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1144 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1145 1146 if (pcbddc->dbg_flag) { 1147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1148 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1150 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1151 } 1152 1153 if (pcbddc->dbg_flag) { 1154 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1155 } 1156 1157 /* max size of subsets */ 1158 mss = 0; 1159 for (i=0;i<sub_schurs->n_subs;i++) { 1160 PetscInt subset_size; 1161 1162 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1163 mss = PetscMax(mss,subset_size); 1164 } 1165 1166 /* min/max and threshold */ 1167 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1168 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1169 nmax = PetscMax(nmin,nmax); 1170 allocated_S_St = PETSC_FALSE; 1171 if (nmin) { 1172 allocated_S_St = PETSC_TRUE; 1173 } 1174 1175 /* allocate lapack workspace */ 1176 cum = cum2 = 0; 1177 maxneigs = 0; 1178 for (i=0;i<sub_schurs->n_subs;i++) { 1179 PetscInt n,subset_size; 1180 1181 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1182 n = PetscMin(subset_size,nmax); 1183 cum += subset_size; 1184 cum2 += subset_size*n; 1185 maxneigs = PetscMax(maxneigs,n); 1186 } 1187 if (mss) { 1188 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1189 PetscBLASInt B_itype = 1; 1190 PetscBLASInt B_N = mss; 1191 PetscReal zero = 0.0; 1192 PetscReal eps = 0.0; /* dlamch? */ 1193 1194 B_lwork = -1; 1195 S = NULL; 1196 St = NULL; 1197 eigs = NULL; 1198 eigv = NULL; 1199 B_iwork = NULL; 1200 B_ifail = NULL; 1201 #if defined(PETSC_USE_COMPLEX) 1202 rwork = NULL; 1203 #endif 1204 thresh = 1.0; 1205 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1206 #if defined(PETSC_USE_COMPLEX) 1207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1208 #else 1209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1210 #endif 1211 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1212 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1213 } else { 1214 /* TODO */ 1215 } 1216 } else { 1217 lwork = 0; 1218 } 1219 1220 nv = 0; 1221 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1222 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1223 } 1224 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1225 if (allocated_S_St) { 1226 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1227 } 1228 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1229 #if defined(PETSC_USE_COMPLEX) 1230 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1231 #endif 1232 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1233 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1234 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1235 nv+cum,&pcbddc->adaptive_constraints_idxs, 1236 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1237 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1238 1239 maxneigs = 0; 1240 cum = cumarray = 0; 1241 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1242 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1243 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1244 const PetscInt *idxs; 1245 1246 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1247 for (cum=0;cum<nv;cum++) { 1248 pcbddc->adaptive_constraints_n[cum] = 1; 1249 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1250 pcbddc->adaptive_constraints_data[cum] = 1.0; 1251 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1252 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1253 } 1254 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1255 } 1256 1257 if (mss) { /* multilevel */ 1258 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1259 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1260 } 1261 1262 thresh = pcbddc->adaptive_threshold; 1263 for (i=0;i<sub_schurs->n_subs;i++) { 1264 const PetscInt *idxs; 1265 PetscReal upper,lower; 1266 PetscInt j,subset_size,eigs_start = 0; 1267 PetscBLASInt B_N; 1268 PetscBool same_data = PETSC_FALSE; 1269 1270 if (pcbddc->use_deluxe_scaling) { 1271 upper = PETSC_MAX_REAL; 1272 lower = thresh; 1273 } else { 1274 upper = 1./thresh; 1275 lower = 0.; 1276 } 1277 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1278 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1279 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1280 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1281 if (sub_schurs->is_hermitian) { 1282 PetscInt j,k; 1283 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1284 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1285 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1286 } 1287 for (j=0;j<subset_size;j++) { 1288 for (k=j;k<subset_size;k++) { 1289 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1290 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1291 } 1292 } 1293 } else { 1294 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1295 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1296 } 1297 } else { 1298 S = Sarray + cumarray; 1299 St = Starray + cumarray; 1300 } 1301 /* see if we can save some work */ 1302 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1303 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1304 } 1305 1306 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1307 B_neigs = 0; 1308 } else { 1309 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1310 PetscBLASInt B_itype = 1; 1311 PetscBLASInt B_IL, B_IU; 1312 PetscReal eps = -1.0; /* dlamch? */ 1313 PetscInt nmin_s; 1314 PetscBool compute_range = PETSC_FALSE; 1315 1316 if (pcbddc->dbg_flag) { 1317 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1318 } 1319 1320 compute_range = PETSC_FALSE; 1321 if (thresh > 1.+PETSC_SMALL && !same_data) { 1322 compute_range = PETSC_TRUE; 1323 } 1324 1325 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1326 if (compute_range) { 1327 1328 /* ask for eigenvalues larger than thresh */ 1329 #if defined(PETSC_USE_COMPLEX) 1330 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1331 #else 1332 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1333 #endif 1334 } else if (!same_data) { 1335 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1336 B_IL = 1; 1337 #if defined(PETSC_USE_COMPLEX) 1338 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1339 #else 1340 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1341 #endif 1342 } else { /* same_data is true, so get the adaptive function requested by the user */ 1343 PetscInt k; 1344 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1345 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1346 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1347 nmin = nmax; 1348 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1349 for (k=0;k<nmax;k++) { 1350 eigs[k] = 1./PETSC_SMALL; 1351 eigv[k*(subset_size+1)] = 1.0; 1352 } 1353 } 1354 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1355 if (B_ierr) { 1356 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1357 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1358 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1359 } 1360 1361 if (B_neigs > nmax) { 1362 if (pcbddc->dbg_flag) { 1363 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1364 } 1365 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1366 B_neigs = nmax; 1367 } 1368 1369 nmin_s = PetscMin(nmin,B_N); 1370 if (B_neigs < nmin_s) { 1371 PetscBLASInt B_neigs2; 1372 1373 if (pcbddc->use_deluxe_scaling) { 1374 B_IL = B_N - nmin_s + 1; 1375 B_IU = B_N - B_neigs; 1376 } else { 1377 B_IL = B_neigs + 1; 1378 B_IU = nmin_s; 1379 } 1380 if (pcbddc->dbg_flag) { 1381 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1382 } 1383 if (sub_schurs->is_hermitian) { 1384 PetscInt j; 1385 for (j=0;j<subset_size;j++) { 1386 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1387 } 1388 for (j=0;j<subset_size;j++) { 1389 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1390 } 1391 } else { 1392 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1393 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1394 } 1395 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1396 #if defined(PETSC_USE_COMPLEX) 1397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1398 #else 1399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1400 #endif 1401 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1402 B_neigs += B_neigs2; 1403 } 1404 if (B_ierr) { 1405 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1406 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1407 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1408 } 1409 if (pcbddc->dbg_flag) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1411 for (j=0;j<B_neigs;j++) { 1412 if (eigs[j] == 0.0) { 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1414 } else { 1415 if (pcbddc->use_deluxe_scaling) { 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1417 } else { 1418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1419 } 1420 } 1421 } 1422 } 1423 } else { 1424 /* TODO */ 1425 } 1426 } 1427 /* change the basis back to the original one */ 1428 if (sub_schurs->change) { 1429 Mat change,phi,phit; 1430 1431 if (pcbddc->dbg_flag > 1) { 1432 PetscInt ii; 1433 for (ii=0;ii<B_neigs;ii++) { 1434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1435 for (j=0;j<B_N;j++) { 1436 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1437 } 1438 } 1439 } 1440 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1441 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1442 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1443 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1444 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1445 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1446 } 1447 maxneigs = PetscMax(B_neigs,maxneigs); 1448 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1449 if (B_neigs) { 1450 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1451 1452 if (pcbddc->dbg_flag > 1) { 1453 PetscInt ii; 1454 for (ii=0;ii<B_neigs;ii++) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1456 for (j=0;j<B_N;j++) { 1457 #if defined(PETSC_USE_COMPLEX) 1458 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1459 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1461 #else 1462 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1463 #endif 1464 } 1465 } 1466 } 1467 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1468 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1469 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1470 cum++; 1471 } 1472 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1473 /* shift for next computation */ 1474 cumarray += subset_size*subset_size; 1475 } 1476 if (pcbddc->dbg_flag) { 1477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1478 } 1479 1480 if (mss) { 1481 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1482 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1483 /* destroy matrices (junk) */ 1484 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1485 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1486 } 1487 if (allocated_S_St) { 1488 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1489 } 1490 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1491 #if defined(PETSC_USE_COMPLEX) 1492 ierr = PetscFree(rwork);CHKERRQ(ierr); 1493 #endif 1494 if (pcbddc->dbg_flag) { 1495 PetscInt maxneigs_r; 1496 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1497 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "PCBDDCSetUpSolvers" 1504 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1505 { 1506 PetscScalar *coarse_submat_vals; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1511 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1512 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1513 1514 /* Setup local neumann solver ksp_R */ 1515 /* PCBDDCSetUpLocalScatters should be called first! */ 1516 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1517 1518 /* 1519 Setup local correction and local part of coarse basis. 1520 Gives back the dense local part of the coarse matrix in column major ordering 1521 */ 1522 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1523 1524 /* Compute total number of coarse nodes and setup coarse solver */ 1525 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1526 1527 /* free */ 1528 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "PCBDDCResetCustomization" 1534 PetscErrorCode PCBDDCResetCustomization(PC pc) 1535 { 1536 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1541 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1542 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1543 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1544 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1545 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1546 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1547 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1548 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1549 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1550 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "PCBDDCResetTopography" 1556 PetscErrorCode PCBDDCResetTopography(PC pc) 1557 { 1558 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1559 PetscInt i; 1560 PetscErrorCode ierr; 1561 1562 PetscFunctionBegin; 1563 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1564 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1565 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1566 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1567 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1568 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1569 for (i=0;i<pcbddc->n_local_subs;i++) { 1570 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1571 } 1572 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1573 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "PCBDDCResetSolvers" 1579 PetscErrorCode PCBDDCResetSolvers(PC pc) 1580 { 1581 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1586 if (pcbddc->coarse_phi_B) { 1587 PetscScalar *array; 1588 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1589 ierr = PetscFree(array);CHKERRQ(ierr); 1590 } 1591 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1592 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1593 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1594 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1595 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1596 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1597 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1598 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1599 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1600 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1601 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1602 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1603 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1604 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1605 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1606 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1607 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1608 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1609 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1610 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1611 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1612 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1613 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1614 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1615 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1616 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1617 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1618 if (pcbddc->benign_zerodiag_subs) { 1619 PetscInt i; 1620 for (i=0;i<pcbddc->benign_n;i++) { 1621 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1622 } 1623 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1624 } 1625 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1631 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1632 { 1633 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1634 PC_IS *pcis = (PC_IS*)pc->data; 1635 VecType impVecType; 1636 PetscInt n_constraints,n_R,old_size; 1637 PetscErrorCode ierr; 1638 1639 PetscFunctionBegin; 1640 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1641 /* get sizes */ 1642 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1643 n_R = pcis->n - pcbddc->n_vertices; 1644 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1645 /* local work vectors (try to avoid unneeded work)*/ 1646 /* R nodes */ 1647 old_size = -1; 1648 if (pcbddc->vec1_R) { 1649 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1650 } 1651 if (n_R != old_size) { 1652 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1653 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1654 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1655 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1656 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1657 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1658 } 1659 /* local primal dofs */ 1660 old_size = -1; 1661 if (pcbddc->vec1_P) { 1662 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1663 } 1664 if (pcbddc->local_primal_size != old_size) { 1665 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1666 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1667 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1668 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1669 } 1670 /* local explicit constraints */ 1671 old_size = -1; 1672 if (pcbddc->vec1_C) { 1673 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1674 } 1675 if (n_constraints && n_constraints != old_size) { 1676 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1677 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1678 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1679 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "PCBDDCSetUpCorrection" 1686 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1687 { 1688 PetscErrorCode ierr; 1689 /* pointers to pcis and pcbddc */ 1690 PC_IS* pcis = (PC_IS*)pc->data; 1691 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1692 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1693 /* submatrices of local problem */ 1694 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1695 /* submatrices of local coarse problem */ 1696 Mat S_VV,S_CV,S_VC,S_CC; 1697 /* working matrices */ 1698 Mat C_CR; 1699 /* additional working stuff */ 1700 PC pc_R; 1701 Mat F; 1702 Vec dummy_vec; 1703 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1704 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1705 PetscScalar *work; 1706 PetscInt *idx_V_B; 1707 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1708 PetscInt i,n_R,n_D,n_B; 1709 1710 /* some shortcuts to scalars */ 1711 PetscScalar one=1.0,m_one=-1.0; 1712 1713 PetscFunctionBegin; 1714 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1715 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1716 } 1717 1718 /* Set Non-overlapping dimensions */ 1719 n_vertices = pcbddc->n_vertices; 1720 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1721 n_B = pcis->n_B; 1722 n_D = pcis->n - n_B; 1723 n_R = pcis->n - n_vertices; 1724 1725 /* vertices in boundary numbering */ 1726 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1727 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1728 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1729 1730 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1731 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1732 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1733 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1735 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1736 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1737 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1739 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1740 1741 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1742 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1743 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1744 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1745 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1746 lda_rhs = n_R; 1747 need_benign_correction = PETSC_FALSE; 1748 if (isLU || isILU || isCHOL) { 1749 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1750 } else if (sub_schurs->reuse_solver) { 1751 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1752 MatFactorType type; 1753 1754 F = reuse_solver->F; 1755 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1756 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1757 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1758 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1759 } else { 1760 F = NULL; 1761 } 1762 1763 /* allocate workspace */ 1764 n = 0; 1765 if (n_constraints) { 1766 n += lda_rhs*n_constraints; 1767 } 1768 if (n_vertices) { 1769 n = PetscMax(2*lda_rhs*n_vertices,n); 1770 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1771 } 1772 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1773 1774 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1775 dummy_vec = NULL; 1776 if (need_benign_correction && lda_rhs != n_R && F) { 1777 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1778 } 1779 1780 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1781 if (n_constraints) { 1782 Mat M1,M2,M3,C_B; 1783 IS is_aux; 1784 PetscScalar *array,*array2; 1785 1786 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1787 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1788 1789 /* Extract constraints on R nodes: C_{CR} */ 1790 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1791 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1792 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1793 1794 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1795 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1796 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1797 for (i=0;i<n_constraints;i++) { 1798 const PetscScalar *row_cmat_values; 1799 const PetscInt *row_cmat_indices; 1800 PetscInt size_of_constraint,j; 1801 1802 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1803 for (j=0;j<size_of_constraint;j++) { 1804 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1805 } 1806 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1807 } 1808 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1809 if (F) { 1810 Mat B; 1811 1812 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1813 if (need_benign_correction) { 1814 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1815 1816 /* rhs is already zero on interior dofs, no need to change the rhs */ 1817 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1818 } 1819 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1820 if (need_benign_correction) { 1821 PetscScalar *marr; 1822 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1823 1824 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1825 if (lda_rhs != n_R) { 1826 for (i=0;i<n_constraints;i++) { 1827 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1828 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1829 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1830 } 1831 } else { 1832 for (i=0;i<n_constraints;i++) { 1833 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1834 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1835 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1836 } 1837 } 1838 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1839 } 1840 ierr = MatDestroy(&B);CHKERRQ(ierr); 1841 } else { 1842 PetscScalar *marr; 1843 1844 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1845 for (i=0;i<n_constraints;i++) { 1846 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1847 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1848 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1849 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1850 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1851 } 1852 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1853 } 1854 if (!pcbddc->switch_static) { 1855 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1856 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1857 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1858 for (i=0;i<n_constraints;i++) { 1859 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1860 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1861 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1862 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1863 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1864 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1865 } 1866 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1867 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1868 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1869 } else { 1870 if (lda_rhs != n_R) { 1871 IS dummy; 1872 1873 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1874 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1875 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1876 } else { 1877 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1878 pcbddc->local_auxmat2 = local_auxmat2_R; 1879 } 1880 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1881 } 1882 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1883 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1884 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1885 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1886 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1887 if (isCHOL) { 1888 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1889 } else { 1890 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1891 } 1892 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1893 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1894 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1895 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1896 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1897 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1898 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1899 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1900 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1901 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1902 } 1903 1904 /* Get submatrices from subdomain matrix */ 1905 if (n_vertices) { 1906 IS is_aux; 1907 1908 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1909 IS tis; 1910 1911 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1912 ierr = ISSort(tis);CHKERRQ(ierr); 1913 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1914 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1915 } else { 1916 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1917 } 1918 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1919 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1920 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1921 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1922 } 1923 1924 /* Matrix of coarse basis functions (local) */ 1925 if (pcbddc->coarse_phi_B) { 1926 PetscInt on_B,on_primal,on_D=n_D; 1927 if (pcbddc->coarse_phi_D) { 1928 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1929 } 1930 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1931 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1932 PetscScalar *marray; 1933 1934 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1935 ierr = PetscFree(marray);CHKERRQ(ierr); 1936 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1937 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1938 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1939 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1940 } 1941 } 1942 1943 if (!pcbddc->coarse_phi_B) { 1944 PetscScalar *marray; 1945 1946 n = n_B*pcbddc->local_primal_size; 1947 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1948 n += n_D*pcbddc->local_primal_size; 1949 } 1950 if (!pcbddc->symmetric_primal) { 1951 n *= 2; 1952 } 1953 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1955 n = n_B*pcbddc->local_primal_size; 1956 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1957 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1958 n += n_D*pcbddc->local_primal_size; 1959 } 1960 if (!pcbddc->symmetric_primal) { 1961 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1962 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1963 n = n_B*pcbddc->local_primal_size; 1964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1965 } 1966 } else { 1967 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1968 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1969 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1970 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1971 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1972 } 1973 } 1974 } 1975 1976 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1977 p0_lidx_I = NULL; 1978 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1979 const PetscInt *idxs; 1980 1981 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1982 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1983 for (i=0;i<pcbddc->benign_n;i++) { 1984 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1985 } 1986 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1987 } 1988 1989 /* vertices */ 1990 if (n_vertices) { 1991 1992 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1993 1994 if (n_R) { 1995 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1996 PetscBLASInt B_N,B_one = 1; 1997 PetscScalar *x,*y; 1998 PetscBool isseqaij; 1999 2000 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2001 if (need_benign_correction) { 2002 ISLocalToGlobalMapping RtoN; 2003 IS is_p0; 2004 PetscInt *idxs_p0,n; 2005 2006 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2007 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2008 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2009 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2010 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2011 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2012 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2013 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2014 } 2015 2016 if (lda_rhs == n_R) { 2017 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2018 } else { 2019 PetscScalar *av,*array; 2020 const PetscInt *xadj,*adjncy; 2021 PetscInt n; 2022 PetscBool flg_row; 2023 2024 array = work+lda_rhs*n_vertices; 2025 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2026 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2027 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2028 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2029 for (i=0;i<n;i++) { 2030 PetscInt j; 2031 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2032 } 2033 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2034 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2035 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2036 } 2037 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2038 if (need_benign_correction) { 2039 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2040 PetscScalar *marr; 2041 2042 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2043 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2044 2045 | 0 0 0 | (V) 2046 L = | 0 0 -1 | (P-p0) 2047 | 0 0 -1 | (p0) 2048 2049 */ 2050 for (i=0;i<reuse_solver->benign_n;i++) { 2051 const PetscScalar *vals; 2052 const PetscInt *idxs,*idxs_zero; 2053 PetscInt n,j,nz; 2054 2055 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2056 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2057 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2058 for (j=0;j<n;j++) { 2059 PetscScalar val = vals[j]; 2060 PetscInt k,col = idxs[j]; 2061 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2062 } 2063 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2064 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2065 } 2066 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2067 } 2068 if (F) { 2069 /* need to correct the rhs */ 2070 if (need_benign_correction) { 2071 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2072 PetscScalar *marr; 2073 2074 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2075 if (lda_rhs != n_R) { 2076 for (i=0;i<n_vertices;i++) { 2077 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2078 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2079 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2080 } 2081 } else { 2082 for (i=0;i<n_vertices;i++) { 2083 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2084 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2085 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2086 } 2087 } 2088 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2089 } 2090 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2091 /* need to correct the solution */ 2092 if (need_benign_correction) { 2093 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2094 PetscScalar *marr; 2095 2096 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2097 if (lda_rhs != n_R) { 2098 for (i=0;i<n_vertices;i++) { 2099 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2100 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2101 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2102 } 2103 } else { 2104 for (i=0;i<n_vertices;i++) { 2105 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2106 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2107 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2108 } 2109 } 2110 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2111 } 2112 } else { 2113 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2114 for (i=0;i<n_vertices;i++) { 2115 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2116 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2117 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2118 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2119 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2120 } 2121 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2122 } 2123 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2124 /* S_VV and S_CV */ 2125 if (n_constraints) { 2126 Mat B; 2127 2128 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2129 for (i=0;i<n_vertices;i++) { 2130 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2131 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2132 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2133 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2134 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2135 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2136 } 2137 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2138 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2139 ierr = MatDestroy(&B);CHKERRQ(ierr); 2140 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2141 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2142 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2143 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2144 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2145 ierr = MatDestroy(&B);CHKERRQ(ierr); 2146 } 2147 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2148 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2149 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2150 } 2151 if (lda_rhs != n_R) { 2152 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2154 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2155 } 2156 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2157 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2158 if (need_benign_correction) { 2159 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2160 PetscScalar *marr,*sums; 2161 2162 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2163 ierr = MatDenseGetArray(S_VVt,&marr); 2164 for (i=0;i<reuse_solver->benign_n;i++) { 2165 const PetscScalar *vals; 2166 const PetscInt *idxs,*idxs_zero; 2167 PetscInt n,j,nz; 2168 2169 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2170 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2171 for (j=0;j<n_vertices;j++) { 2172 PetscInt k; 2173 sums[j] = 0.; 2174 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2175 } 2176 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2177 for (j=0;j<n;j++) { 2178 PetscScalar val = vals[j]; 2179 PetscInt k; 2180 for (k=0;k<n_vertices;k++) { 2181 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2182 } 2183 } 2184 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2185 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2186 } 2187 ierr = PetscFree(sums);CHKERRQ(ierr); 2188 ierr = MatDenseRestoreArray(S_VVt,&marr); 2189 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2190 } 2191 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2192 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2193 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2194 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2195 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2196 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2197 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2198 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2199 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2200 } else { 2201 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2202 } 2203 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2204 2205 /* coarse basis functions */ 2206 for (i=0;i<n_vertices;i++) { 2207 PetscScalar *y; 2208 2209 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2210 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2211 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2212 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2213 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2214 y[n_B*i+idx_V_B[i]] = 1.0; 2215 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2216 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2217 2218 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2219 PetscInt j; 2220 2221 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2222 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2223 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2224 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2225 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2226 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2227 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2228 } 2229 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2230 } 2231 /* if n_R == 0 the object is not destroyed */ 2232 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2233 } 2234 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2235 2236 if (n_constraints) { 2237 Mat B; 2238 2239 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2240 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2241 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2242 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2243 if (n_vertices) { 2244 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2245 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2246 } else { 2247 Mat S_VCt; 2248 2249 if (lda_rhs != n_R) { 2250 ierr = MatDestroy(&B);CHKERRQ(ierr); 2251 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2252 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2253 } 2254 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2255 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2256 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2257 } 2258 } 2259 ierr = MatDestroy(&B);CHKERRQ(ierr); 2260 /* coarse basis functions */ 2261 for (i=0;i<n_constraints;i++) { 2262 PetscScalar *y; 2263 2264 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2265 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2266 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2267 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2268 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2269 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2270 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2271 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2272 PetscInt j; 2273 2274 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2275 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2276 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2277 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2278 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2279 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2280 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2281 } 2282 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2283 } 2284 } 2285 if (n_constraints) { 2286 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2287 } 2288 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2289 2290 /* coarse matrix entries relative to B_0 */ 2291 if (pcbddc->benign_n) { 2292 Mat B0_B,B0_BPHI; 2293 IS is_dummy; 2294 PetscScalar *data; 2295 PetscInt j; 2296 2297 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2298 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2299 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2300 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2301 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2302 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2303 for (j=0;j<pcbddc->benign_n;j++) { 2304 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2305 for (i=0;i<pcbddc->local_primal_size;i++) { 2306 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2307 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2308 } 2309 } 2310 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2311 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2312 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2313 } 2314 2315 /* compute other basis functions for non-symmetric problems */ 2316 if (!pcbddc->symmetric_primal) { 2317 Mat B_V=NULL,B_C=NULL; 2318 PetscScalar *marray; 2319 2320 if (n_constraints) { 2321 Mat S_CCT,C_CRT; 2322 2323 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2324 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2325 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2326 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2327 if (n_vertices) { 2328 Mat S_VCT; 2329 2330 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2331 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2332 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2333 } 2334 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2335 } else { 2336 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2337 } 2338 if (n_vertices && n_R) { 2339 PetscScalar *av,*marray; 2340 const PetscInt *xadj,*adjncy; 2341 PetscInt n; 2342 PetscBool flg_row; 2343 2344 /* B_V = B_V - A_VR^T */ 2345 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2346 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2347 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2348 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2349 for (i=0;i<n;i++) { 2350 PetscInt j; 2351 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2352 } 2353 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2354 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2355 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2356 } 2357 2358 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2359 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2360 for (i=0;i<n_vertices;i++) { 2361 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2362 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2363 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2364 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2365 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2366 } 2367 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2368 if (B_C) { 2369 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2370 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2371 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2372 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2373 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2374 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2375 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2376 } 2377 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2378 } 2379 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2380 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2381 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2382 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2383 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2384 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2385 } 2386 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2387 /* coarse basis functions */ 2388 for (i=0;i<pcbddc->local_primal_size;i++) { 2389 PetscScalar *y; 2390 2391 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2392 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2393 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2394 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2395 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2396 if (i<n_vertices) { 2397 y[n_B*i+idx_V_B[i]] = 1.0; 2398 } 2399 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2400 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2401 2402 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2403 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2404 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2407 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2408 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2409 } 2410 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2411 } 2412 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2413 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2414 } 2415 /* free memory */ 2416 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2417 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2418 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2419 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2420 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2421 ierr = PetscFree(work);CHKERRQ(ierr); 2422 if (n_vertices) { 2423 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2424 } 2425 if (n_constraints) { 2426 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2427 } 2428 /* Checking coarse_sub_mat and coarse basis functios */ 2429 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2430 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2431 if (pcbddc->dbg_flag) { 2432 Mat coarse_sub_mat; 2433 Mat AUXMAT,TM1,TM2,TM3,TM4; 2434 Mat coarse_phi_D,coarse_phi_B; 2435 Mat coarse_psi_D,coarse_psi_B; 2436 Mat A_II,A_BB,A_IB,A_BI; 2437 Mat C_B,CPHI; 2438 IS is_dummy; 2439 Vec mones; 2440 MatType checkmattype=MATSEQAIJ; 2441 PetscReal real_value; 2442 2443 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2444 Mat A; 2445 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2446 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2447 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2448 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2449 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2450 ierr = MatDestroy(&A);CHKERRQ(ierr); 2451 } else { 2452 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2453 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2454 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2455 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2456 } 2457 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2458 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2459 if (!pcbddc->symmetric_primal) { 2460 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2461 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2462 } 2463 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2464 2465 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2466 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2467 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2468 if (!pcbddc->symmetric_primal) { 2469 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2470 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2471 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2472 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2473 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2474 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2475 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2476 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2477 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2478 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2479 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2480 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2481 } else { 2482 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2483 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2484 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2485 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2486 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2487 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2488 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2489 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2490 } 2491 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2492 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2493 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2494 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2495 if (pcbddc->benign_n) { 2496 Mat B0_B,B0_BPHI; 2497 PetscScalar *data,*data2; 2498 PetscInt j; 2499 2500 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2501 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2502 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2503 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2504 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2505 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2506 for (j=0;j<pcbddc->benign_n;j++) { 2507 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2508 for (i=0;i<pcbddc->local_primal_size;i++) { 2509 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2510 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2511 } 2512 } 2513 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2514 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2515 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2516 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2517 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2518 } 2519 #if 0 2520 { 2521 PetscViewer viewer; 2522 char filename[256]; 2523 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2524 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2525 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2526 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2527 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2528 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2529 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2530 if (save_change) { 2531 Mat phi_B; 2532 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2533 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2534 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2535 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2536 } else { 2537 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2538 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2539 } 2540 if (pcbddc->coarse_phi_D) { 2541 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2542 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2543 } 2544 if (pcbddc->coarse_psi_B) { 2545 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2546 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2547 } 2548 if (pcbddc->coarse_psi_D) { 2549 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2550 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2551 } 2552 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2553 } 2554 #endif 2555 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2556 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2557 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2558 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2559 2560 /* check constraints */ 2561 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2562 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2563 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2564 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2565 } else { 2566 PetscScalar *data; 2567 Mat tmat; 2568 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2569 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2570 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2571 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2572 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2573 } 2574 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2575 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2576 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2577 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2579 if (!pcbddc->symmetric_primal) { 2580 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2581 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2582 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2583 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2585 } 2586 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2587 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2588 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2589 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2590 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2591 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2592 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2593 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2594 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2595 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2596 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2597 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2598 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2599 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2600 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2601 if (!pcbddc->symmetric_primal) { 2602 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2603 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2604 } 2605 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2606 } 2607 /* get back data */ 2608 *coarse_submat_vals_n = coarse_submat_vals; 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2614 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2615 { 2616 Mat *work_mat; 2617 IS isrow_s,iscol_s; 2618 PetscBool rsorted,csorted; 2619 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2620 PetscErrorCode ierr; 2621 2622 PetscFunctionBegin; 2623 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2624 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2625 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2626 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2627 2628 if (!rsorted) { 2629 const PetscInt *idxs; 2630 PetscInt *idxs_sorted,i; 2631 2632 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2633 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2634 for (i=0;i<rsize;i++) { 2635 idxs_perm_r[i] = i; 2636 } 2637 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2638 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2639 for (i=0;i<rsize;i++) { 2640 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2641 } 2642 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2643 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2644 } else { 2645 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2646 isrow_s = isrow; 2647 } 2648 2649 if (!csorted) { 2650 if (isrow == iscol) { 2651 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2652 iscol_s = isrow_s; 2653 } else { 2654 const PetscInt *idxs; 2655 PetscInt *idxs_sorted,i; 2656 2657 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2658 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2659 for (i=0;i<csize;i++) { 2660 idxs_perm_c[i] = i; 2661 } 2662 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2663 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2664 for (i=0;i<csize;i++) { 2665 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2666 } 2667 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2668 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2669 } 2670 } else { 2671 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2672 iscol_s = iscol; 2673 } 2674 2675 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2676 2677 if (!rsorted || !csorted) { 2678 Mat new_mat; 2679 IS is_perm_r,is_perm_c; 2680 2681 if (!rsorted) { 2682 PetscInt *idxs_r,i; 2683 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2684 for (i=0;i<rsize;i++) { 2685 idxs_r[idxs_perm_r[i]] = i; 2686 } 2687 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2688 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2689 } else { 2690 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2691 } 2692 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2693 2694 if (!csorted) { 2695 if (isrow_s == iscol_s) { 2696 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2697 is_perm_c = is_perm_r; 2698 } else { 2699 PetscInt *idxs_c,i; 2700 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2701 for (i=0;i<csize;i++) { 2702 idxs_c[idxs_perm_c[i]] = i; 2703 } 2704 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2705 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2706 } 2707 } else { 2708 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2709 } 2710 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2711 2712 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2713 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2714 work_mat[0] = new_mat; 2715 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2716 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2717 } 2718 2719 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2720 *B = work_mat[0]; 2721 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2722 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2723 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2729 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2730 { 2731 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2732 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2733 Mat new_mat; 2734 IS is_local,is_global; 2735 PetscInt local_size; 2736 PetscBool isseqaij; 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2741 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2742 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2743 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2744 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2745 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2746 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2747 2748 /* check */ 2749 if (pcbddc->dbg_flag) { 2750 Vec x,x_change; 2751 PetscReal error; 2752 2753 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2754 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2755 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2756 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2757 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2758 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2759 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2760 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2761 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2762 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2763 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2764 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2765 ierr = VecDestroy(&x);CHKERRQ(ierr); 2766 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2767 } 2768 2769 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2770 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2771 if (isseqaij) { 2772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2773 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2774 } else { 2775 Mat work_mat; 2776 2777 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2778 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2779 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2780 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2781 } 2782 if (matis->A->symmetric_set) { 2783 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2784 #if !defined(PETSC_USE_COMPLEX) 2785 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2786 #endif 2787 } 2788 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2789 PetscFunctionReturn(0); 2790 } 2791 2792 #undef __FUNCT__ 2793 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2794 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2795 { 2796 PC_IS* pcis = (PC_IS*)(pc->data); 2797 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2798 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2799 PetscInt *idx_R_local=NULL; 2800 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2801 PetscInt vbs,bs; 2802 PetscBT bitmask=NULL; 2803 PetscErrorCode ierr; 2804 2805 PetscFunctionBegin; 2806 /* 2807 No need to setup local scatters if 2808 - primal space is unchanged 2809 AND 2810 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2811 AND 2812 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2813 */ 2814 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2815 PetscFunctionReturn(0); 2816 } 2817 /* destroy old objects */ 2818 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2819 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2820 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2821 /* Set Non-overlapping dimensions */ 2822 n_B = pcis->n_B; 2823 n_D = pcis->n - n_B; 2824 n_vertices = pcbddc->n_vertices; 2825 2826 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2827 2828 /* create auxiliary bitmask and allocate workspace */ 2829 if (!sub_schurs->reuse_solver) { 2830 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2831 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2832 for (i=0;i<n_vertices;i++) { 2833 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2834 } 2835 2836 for (i=0, n_R=0; i<pcis->n; i++) { 2837 if (!PetscBTLookup(bitmask,i)) { 2838 idx_R_local[n_R++] = i; 2839 } 2840 } 2841 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2842 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2843 2844 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2845 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2846 } 2847 2848 /* Block code */ 2849 vbs = 1; 2850 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2851 if (bs>1 && !(n_vertices%bs)) { 2852 PetscBool is_blocked = PETSC_TRUE; 2853 PetscInt *vary; 2854 if (!sub_schurs->reuse_solver) { 2855 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2856 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2857 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2858 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2859 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2860 for (i=0; i<pcis->n/bs; i++) { 2861 if (vary[i]!=0 && vary[i]!=bs) { 2862 is_blocked = PETSC_FALSE; 2863 break; 2864 } 2865 } 2866 ierr = PetscFree(vary);CHKERRQ(ierr); 2867 } else { 2868 /* Verify directly the R set */ 2869 for (i=0; i<n_R/bs; i++) { 2870 PetscInt j,node=idx_R_local[bs*i]; 2871 for (j=1; j<bs; j++) { 2872 if (node != idx_R_local[bs*i+j]-j) { 2873 is_blocked = PETSC_FALSE; 2874 break; 2875 } 2876 } 2877 } 2878 } 2879 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2880 vbs = bs; 2881 for (i=0;i<n_R/vbs;i++) { 2882 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2883 } 2884 } 2885 } 2886 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2887 if (sub_schurs->reuse_solver) { 2888 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2889 2890 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2891 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2892 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2893 reuse_solver->is_R = pcbddc->is_R_local; 2894 } else { 2895 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2896 } 2897 2898 /* print some info if requested */ 2899 if (pcbddc->dbg_flag) { 2900 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2902 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2903 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2904 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2905 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2906 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2907 } 2908 2909 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2910 if (!sub_schurs->reuse_solver) { 2911 IS is_aux1,is_aux2; 2912 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2913 2914 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2915 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2916 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2917 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2918 for (i=0; i<n_D; i++) { 2919 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2920 } 2921 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2922 for (i=0, j=0; i<n_R; i++) { 2923 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2924 aux_array1[j++] = i; 2925 } 2926 } 2927 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2928 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2929 for (i=0, j=0; i<n_B; i++) { 2930 if (!PetscBTLookup(bitmask,is_indices[i])) { 2931 aux_array2[j++] = i; 2932 } 2933 } 2934 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2935 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2936 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2937 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2938 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2939 2940 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2941 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2942 for (i=0, j=0; i<n_R; i++) { 2943 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2944 aux_array1[j++] = i; 2945 } 2946 } 2947 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2948 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2949 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2950 } 2951 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2952 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2953 } else { 2954 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2955 IS tis; 2956 PetscInt schur_size; 2957 2958 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2959 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2960 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2961 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2962 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2963 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2964 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2965 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2966 } 2967 } 2968 PetscFunctionReturn(0); 2969 } 2970 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2974 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2975 { 2976 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2977 PC_IS *pcis = (PC_IS*)pc->data; 2978 PC pc_temp; 2979 Mat A_RR; 2980 MatReuse reuse; 2981 PetscScalar m_one = -1.0; 2982 PetscReal value; 2983 PetscInt n_D,n_R; 2984 PetscBool check_corr[2],issbaij; 2985 PetscErrorCode ierr; 2986 /* prefixes stuff */ 2987 char dir_prefix[256],neu_prefix[256],str_level[16]; 2988 size_t len; 2989 2990 PetscFunctionBegin; 2991 2992 /* compute prefixes */ 2993 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2994 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2995 if (!pcbddc->current_level) { 2996 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2997 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2998 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2999 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3000 } else { 3001 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3002 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3003 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3004 len -= 15; /* remove "pc_bddc_coarse_" */ 3005 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3006 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3007 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3008 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3009 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3010 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3011 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3012 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3013 } 3014 3015 /* DIRICHLET PROBLEM */ 3016 if (dirichlet) { 3017 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3018 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3019 if (!sub_schurs->reuse_solver) { 3020 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3021 } 3022 if (pcbddc->dbg_flag) { 3023 Mat A_IIn; 3024 3025 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3026 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3027 pcis->A_II = A_IIn; 3028 } 3029 } 3030 if (pcbddc->local_mat->symmetric_set) { 3031 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3032 } 3033 /* Matrix for Dirichlet problem is pcis->A_II */ 3034 n_D = pcis->n - pcis->n_B; 3035 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3036 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3037 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3038 /* default */ 3039 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3040 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3041 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3042 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3043 if (issbaij) { 3044 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3045 } else { 3046 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3047 } 3048 /* Allow user's customization */ 3049 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3050 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3051 } 3052 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3053 if (sub_schurs->reuse_solver) { 3054 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3055 3056 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3057 } 3058 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3059 if (!n_D) { 3060 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3061 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3062 } 3063 /* Set Up KSP for Dirichlet problem of BDDC */ 3064 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3065 /* set ksp_D into pcis data */ 3066 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3067 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3068 pcis->ksp_D = pcbddc->ksp_D; 3069 } 3070 3071 /* NEUMANN PROBLEM */ 3072 A_RR = 0; 3073 if (neumann) { 3074 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3075 PetscInt ibs,mbs; 3076 PetscBool issbaij; 3077 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3078 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3079 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3080 if (pcbddc->ksp_R) { /* already created ksp */ 3081 PetscInt nn_R; 3082 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3083 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3084 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3085 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3086 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3087 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3088 reuse = MAT_INITIAL_MATRIX; 3089 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3090 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3091 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3092 reuse = MAT_INITIAL_MATRIX; 3093 } else { /* safe to reuse the matrix */ 3094 reuse = MAT_REUSE_MATRIX; 3095 } 3096 } 3097 /* last check */ 3098 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3099 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3100 reuse = MAT_INITIAL_MATRIX; 3101 } 3102 } else { /* first time, so we need to create the matrix */ 3103 reuse = MAT_INITIAL_MATRIX; 3104 } 3105 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3106 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3107 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3108 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3109 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3110 if (matis->A == pcbddc->local_mat) { 3111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3112 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3113 } else { 3114 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3115 } 3116 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3117 if (matis->A == pcbddc->local_mat) { 3118 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3119 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3120 } else { 3121 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3122 } 3123 } 3124 /* extract A_RR */ 3125 if (sub_schurs->reuse_solver) { 3126 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3127 3128 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3129 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3130 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3131 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3132 } else { 3133 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3134 } 3135 } else { 3136 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3137 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3138 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3139 } 3140 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3141 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3142 } 3143 if (pcbddc->local_mat->symmetric_set) { 3144 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3145 } 3146 if (!pcbddc->ksp_R) { /* create object if not present */ 3147 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3148 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3149 /* default */ 3150 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3151 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3152 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3153 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3154 if (issbaij) { 3155 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3156 } else { 3157 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3158 } 3159 /* Allow user's customization */ 3160 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3161 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3162 } 3163 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3164 if (!n_R) { 3165 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3166 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3167 } 3168 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3169 /* Reuse solver if it is present */ 3170 if (sub_schurs->reuse_solver) { 3171 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3172 3173 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3174 } 3175 /* Set Up KSP for Neumann problem of BDDC */ 3176 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3177 } 3178 3179 if (pcbddc->dbg_flag) { 3180 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3181 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3183 } 3184 3185 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3186 check_corr[0] = check_corr[1] = PETSC_FALSE; 3187 if (pcbddc->NullSpace_corr[0]) { 3188 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3189 } 3190 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3191 check_corr[0] = PETSC_TRUE; 3192 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3193 } 3194 if (neumann && pcbddc->NullSpace_corr[2]) { 3195 check_corr[1] = PETSC_TRUE; 3196 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3197 } 3198 3199 /* check Dirichlet and Neumann solvers */ 3200 if (pcbddc->dbg_flag) { 3201 if (dirichlet) { /* Dirichlet */ 3202 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3203 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3204 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3205 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3206 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3208 if (check_corr[0]) { 3209 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3210 } 3211 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3212 } 3213 if (neumann) { /* Neumann */ 3214 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3215 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3216 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3217 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3218 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3219 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3220 if (check_corr[1]) { 3221 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3222 } 3223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3224 } 3225 } 3226 /* free Neumann problem's matrix */ 3227 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3228 PetscFunctionReturn(0); 3229 } 3230 3231 #undef __FUNCT__ 3232 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3233 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3234 { 3235 PetscErrorCode ierr; 3236 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3237 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3238 3239 PetscFunctionBegin; 3240 if (!sub_schurs->reuse_solver) { 3241 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3242 } 3243 if (!pcbddc->switch_static) { 3244 if (applytranspose && pcbddc->local_auxmat1) { 3245 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3246 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3247 } 3248 if (!sub_schurs->reuse_solver) { 3249 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3250 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3251 } else { 3252 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3253 3254 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3255 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3256 } 3257 } else { 3258 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3259 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3260 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3261 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3262 if (applytranspose && pcbddc->local_auxmat1) { 3263 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3264 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3265 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3266 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3267 } 3268 } 3269 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3270 if (applytranspose) { 3271 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3272 } else { 3273 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3274 } 3275 } else { 3276 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3277 3278 if (applytranspose) { 3279 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3280 } else { 3281 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3282 } 3283 } 3284 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3285 if (!pcbddc->switch_static) { 3286 if (!sub_schurs->reuse_solver) { 3287 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3288 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3289 } else { 3290 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3291 3292 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3293 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3294 } 3295 if (!applytranspose && pcbddc->local_auxmat1) { 3296 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3297 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3298 } 3299 } else { 3300 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3301 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3302 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3303 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3304 if (!applytranspose && pcbddc->local_auxmat1) { 3305 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3306 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3307 } 3308 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3309 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3310 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3311 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3312 } 3313 PetscFunctionReturn(0); 3314 } 3315 3316 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3319 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3320 { 3321 PetscErrorCode ierr; 3322 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3323 PC_IS* pcis = (PC_IS*) (pc->data); 3324 const PetscScalar zero = 0.0; 3325 3326 PetscFunctionBegin; 3327 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3328 if (!pcbddc->benign_apply_coarse_only) { 3329 if (applytranspose) { 3330 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3331 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3332 } else { 3333 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3334 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3335 } 3336 } else { 3337 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3338 } 3339 3340 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3341 if (pcbddc->benign_n) { 3342 PetscScalar *array; 3343 PetscInt j; 3344 3345 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3346 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3347 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3348 } 3349 3350 /* start communications from local primal nodes to rhs of coarse solver */ 3351 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3352 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3353 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3354 3355 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3356 if (pcbddc->coarse_ksp) { 3357 Mat coarse_mat; 3358 Vec rhs,sol; 3359 MatNullSpace nullsp; 3360 PetscBool isbddc = PETSC_FALSE; 3361 3362 if (pcbddc->benign_have_null) { 3363 PC coarse_pc; 3364 3365 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3366 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3367 /* we need to propagate to coarser levels the need for a possible benign correction */ 3368 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3369 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3370 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3371 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3372 } 3373 } 3374 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3375 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3376 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3377 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3378 if (nullsp) { 3379 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3380 } 3381 if (applytranspose) { 3382 if (pcbddc->benign_apply_coarse_only) { 3383 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3384 } else { 3385 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3386 } 3387 } else { 3388 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3389 PC coarse_pc; 3390 3391 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3392 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3393 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3394 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3395 } else { 3396 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3397 } 3398 } 3399 /* we don't need the benign correction at coarser levels anymore */ 3400 if (pcbddc->benign_have_null && isbddc) { 3401 PC coarse_pc; 3402 PC_BDDC* coarsepcbddc; 3403 3404 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3405 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3406 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3407 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3408 } 3409 if (nullsp) { 3410 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3411 } 3412 } 3413 3414 /* Local solution on R nodes */ 3415 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3416 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3417 } 3418 /* communications from coarse sol to local primal nodes */ 3419 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3420 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3421 3422 /* Sum contributions from the two levels */ 3423 if (!pcbddc->benign_apply_coarse_only) { 3424 if (applytranspose) { 3425 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3426 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3427 } else { 3428 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3429 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3430 } 3431 /* store p0 */ 3432 if (pcbddc->benign_n) { 3433 PetscScalar *array; 3434 PetscInt j; 3435 3436 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3437 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3438 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3439 } 3440 } else { /* expand the coarse solution */ 3441 if (applytranspose) { 3442 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3443 } else { 3444 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3445 } 3446 } 3447 PetscFunctionReturn(0); 3448 } 3449 3450 #undef __FUNCT__ 3451 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3452 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3453 { 3454 PetscErrorCode ierr; 3455 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3456 PetscScalar *array; 3457 Vec from,to; 3458 3459 PetscFunctionBegin; 3460 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3461 from = pcbddc->coarse_vec; 3462 to = pcbddc->vec1_P; 3463 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3464 Vec tvec; 3465 3466 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3467 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3468 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3469 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3470 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3471 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3472 } 3473 } else { /* from local to global -> put data in coarse right hand side */ 3474 from = pcbddc->vec1_P; 3475 to = pcbddc->coarse_vec; 3476 } 3477 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3478 PetscFunctionReturn(0); 3479 } 3480 3481 #undef __FUNCT__ 3482 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3483 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3484 { 3485 PetscErrorCode ierr; 3486 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3487 PetscScalar *array; 3488 Vec from,to; 3489 3490 PetscFunctionBegin; 3491 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3492 from = pcbddc->coarse_vec; 3493 to = pcbddc->vec1_P; 3494 } else { /* from local to global -> put data in coarse right hand side */ 3495 from = pcbddc->vec1_P; 3496 to = pcbddc->coarse_vec; 3497 } 3498 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3499 if (smode == SCATTER_FORWARD) { 3500 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3501 Vec tvec; 3502 3503 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3504 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3505 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3506 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3507 } 3508 } else { 3509 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3510 ierr = VecResetArray(from);CHKERRQ(ierr); 3511 } 3512 } 3513 PetscFunctionReturn(0); 3514 } 3515 3516 /* uncomment for testing purposes */ 3517 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3520 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3521 { 3522 PetscErrorCode ierr; 3523 PC_IS* pcis = (PC_IS*)(pc->data); 3524 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3525 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3526 /* one and zero */ 3527 PetscScalar one=1.0,zero=0.0; 3528 /* space to store constraints and their local indices */ 3529 PetscScalar *constraints_data; 3530 PetscInt *constraints_idxs,*constraints_idxs_B; 3531 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3532 PetscInt *constraints_n; 3533 /* iterators */ 3534 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3535 /* BLAS integers */ 3536 PetscBLASInt lwork,lierr; 3537 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3538 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3539 /* reuse */ 3540 PetscInt olocal_primal_size,olocal_primal_size_cc; 3541 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3542 /* change of basis */ 3543 PetscBool qr_needed; 3544 PetscBT change_basis,qr_needed_idx; 3545 /* auxiliary stuff */ 3546 PetscInt *nnz,*is_indices; 3547 PetscInt ncc; 3548 /* some quantities */ 3549 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3550 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3551 3552 PetscFunctionBegin; 3553 /* Destroy Mat objects computed previously */ 3554 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3555 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3556 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3557 /* save info on constraints from previous setup (if any) */ 3558 olocal_primal_size = pcbddc->local_primal_size; 3559 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3560 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3561 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3562 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3563 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3564 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3565 3566 if (!pcbddc->adaptive_selection) { 3567 IS ISForVertices,*ISForFaces,*ISForEdges; 3568 MatNullSpace nearnullsp; 3569 const Vec *nearnullvecs; 3570 Vec *localnearnullsp; 3571 PetscScalar *array; 3572 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3573 PetscBool nnsp_has_cnst; 3574 /* LAPACK working arrays for SVD or POD */ 3575 PetscBool skip_lapack,boolforchange; 3576 PetscScalar *work; 3577 PetscReal *singular_vals; 3578 #if defined(PETSC_USE_COMPLEX) 3579 PetscReal *rwork; 3580 #endif 3581 #if defined(PETSC_MISSING_LAPACK_GESVD) 3582 PetscScalar *temp_basis,*correlation_mat; 3583 #else 3584 PetscBLASInt dummy_int=1; 3585 PetscScalar dummy_scalar=1.; 3586 #endif 3587 3588 /* Get index sets for faces, edges and vertices from graph */ 3589 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3590 /* print some info */ 3591 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3592 PetscInt nv; 3593 3594 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3595 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3596 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3601 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3602 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3603 } 3604 3605 /* free unneeded index sets */ 3606 if (!pcbddc->use_vertices) { 3607 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3608 } 3609 if (!pcbddc->use_edges) { 3610 for (i=0;i<n_ISForEdges;i++) { 3611 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3612 } 3613 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3614 n_ISForEdges = 0; 3615 } 3616 if (!pcbddc->use_faces) { 3617 for (i=0;i<n_ISForFaces;i++) { 3618 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3619 } 3620 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3621 n_ISForFaces = 0; 3622 } 3623 3624 /* check if near null space is attached to global mat */ 3625 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3626 if (nearnullsp) { 3627 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3628 /* remove any stored info */ 3629 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3630 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3631 /* store information for BDDC solver reuse */ 3632 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3633 pcbddc->onearnullspace = nearnullsp; 3634 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3635 for (i=0;i<nnsp_size;i++) { 3636 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3637 } 3638 } else { /* if near null space is not provided BDDC uses constants by default */ 3639 nnsp_size = 0; 3640 nnsp_has_cnst = PETSC_TRUE; 3641 } 3642 /* get max number of constraints on a single cc */ 3643 max_constraints = nnsp_size; 3644 if (nnsp_has_cnst) max_constraints++; 3645 3646 /* 3647 Evaluate maximum storage size needed by the procedure 3648 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3649 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3650 There can be multiple constraints per connected component 3651 */ 3652 n_vertices = 0; 3653 if (ISForVertices) { 3654 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3655 } 3656 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3657 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3658 3659 total_counts = n_ISForFaces+n_ISForEdges; 3660 total_counts *= max_constraints; 3661 total_counts += n_vertices; 3662 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3663 3664 total_counts = 0; 3665 max_size_of_constraint = 0; 3666 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3667 IS used_is; 3668 if (i<n_ISForEdges) { 3669 used_is = ISForEdges[i]; 3670 } else { 3671 used_is = ISForFaces[i-n_ISForEdges]; 3672 } 3673 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3674 total_counts += j; 3675 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3676 } 3677 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3678 3679 /* get local part of global near null space vectors */ 3680 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3681 for (k=0;k<nnsp_size;k++) { 3682 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3683 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3684 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3685 } 3686 3687 /* whether or not to skip lapack calls */ 3688 skip_lapack = PETSC_TRUE; 3689 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3690 3691 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3692 if (!skip_lapack) { 3693 PetscScalar temp_work; 3694 3695 #if defined(PETSC_MISSING_LAPACK_GESVD) 3696 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3697 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3698 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3699 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3700 #if defined(PETSC_USE_COMPLEX) 3701 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3702 #endif 3703 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3704 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3705 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3706 lwork = -1; 3707 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3708 #if !defined(PETSC_USE_COMPLEX) 3709 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3710 #else 3711 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3712 #endif 3713 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3715 #else /* on missing GESVD */ 3716 /* SVD */ 3717 PetscInt max_n,min_n; 3718 max_n = max_size_of_constraint; 3719 min_n = max_constraints; 3720 if (max_size_of_constraint < max_constraints) { 3721 min_n = max_size_of_constraint; 3722 max_n = max_constraints; 3723 } 3724 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3725 #if defined(PETSC_USE_COMPLEX) 3726 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3727 #endif 3728 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3729 lwork = -1; 3730 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3731 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3732 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3734 #if !defined(PETSC_USE_COMPLEX) 3735 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3736 #else 3737 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3738 #endif 3739 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3740 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3741 #endif /* on missing GESVD */ 3742 /* Allocate optimal workspace */ 3743 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3744 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3745 } 3746 /* Now we can loop on constraining sets */ 3747 total_counts = 0; 3748 constraints_idxs_ptr[0] = 0; 3749 constraints_data_ptr[0] = 0; 3750 /* vertices */ 3751 if (n_vertices) { 3752 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3753 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3754 for (i=0;i<n_vertices;i++) { 3755 constraints_n[total_counts] = 1; 3756 constraints_data[total_counts] = 1.0; 3757 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3758 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3759 total_counts++; 3760 } 3761 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3762 n_vertices = total_counts; 3763 } 3764 3765 /* edges and faces */ 3766 total_counts_cc = total_counts; 3767 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3768 IS used_is; 3769 PetscBool idxs_copied = PETSC_FALSE; 3770 3771 if (ncc<n_ISForEdges) { 3772 used_is = ISForEdges[ncc]; 3773 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3774 } else { 3775 used_is = ISForFaces[ncc-n_ISForEdges]; 3776 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3777 } 3778 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3779 3780 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3781 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3782 /* change of basis should not be performed on local periodic nodes */ 3783 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3784 if (nnsp_has_cnst) { 3785 PetscScalar quad_value; 3786 3787 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3788 idxs_copied = PETSC_TRUE; 3789 3790 if (!pcbddc->use_nnsp_true) { 3791 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3792 } else { 3793 quad_value = 1.0; 3794 } 3795 for (j=0;j<size_of_constraint;j++) { 3796 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3797 } 3798 temp_constraints++; 3799 total_counts++; 3800 } 3801 for (k=0;k<nnsp_size;k++) { 3802 PetscReal real_value; 3803 PetscScalar *ptr_to_data; 3804 3805 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3806 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3807 for (j=0;j<size_of_constraint;j++) { 3808 ptr_to_data[j] = array[is_indices[j]]; 3809 } 3810 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3811 /* check if array is null on the connected component */ 3812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3813 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3814 if (real_value > 0.0) { /* keep indices and values */ 3815 temp_constraints++; 3816 total_counts++; 3817 if (!idxs_copied) { 3818 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3819 idxs_copied = PETSC_TRUE; 3820 } 3821 } 3822 } 3823 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3824 valid_constraints = temp_constraints; 3825 if (!pcbddc->use_nnsp_true && temp_constraints) { 3826 if (temp_constraints == 1) { /* just normalize the constraint */ 3827 PetscScalar norm,*ptr_to_data; 3828 3829 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3830 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3831 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3832 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3833 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3834 } else { /* perform SVD */ 3835 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3836 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3837 3838 #if defined(PETSC_MISSING_LAPACK_GESVD) 3839 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3840 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3841 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3842 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3843 from that computed using LAPACKgesvd 3844 -> This is due to a different computation of eigenvectors in LAPACKheev 3845 -> The quality of the POD-computed basis will be the same */ 3846 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3847 /* Store upper triangular part of correlation matrix */ 3848 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3849 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3850 for (j=0;j<temp_constraints;j++) { 3851 for (k=0;k<j+1;k++) { 3852 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3853 } 3854 } 3855 /* compute eigenvalues and eigenvectors of correlation matrix */ 3856 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3857 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3858 #if !defined(PETSC_USE_COMPLEX) 3859 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3860 #else 3861 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3862 #endif 3863 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3864 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3865 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3866 j = 0; 3867 while (j < temp_constraints && singular_vals[j] < tol) j++; 3868 total_counts = total_counts-j; 3869 valid_constraints = temp_constraints-j; 3870 /* scale and copy POD basis into used quadrature memory */ 3871 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3872 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3873 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3874 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3875 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3876 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3877 if (j<temp_constraints) { 3878 PetscInt ii; 3879 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3880 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3881 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3882 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3883 for (k=0;k<temp_constraints-j;k++) { 3884 for (ii=0;ii<size_of_constraint;ii++) { 3885 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3886 } 3887 } 3888 } 3889 #else /* on missing GESVD */ 3890 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3891 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3892 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3893 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3894 #if !defined(PETSC_USE_COMPLEX) 3895 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3896 #else 3897 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3898 #endif 3899 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3900 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3901 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3902 k = temp_constraints; 3903 if (k > size_of_constraint) k = size_of_constraint; 3904 j = 0; 3905 while (j < k && singular_vals[k-j-1] < tol) j++; 3906 valid_constraints = k-j; 3907 total_counts = total_counts-temp_constraints+valid_constraints; 3908 #endif /* on missing GESVD */ 3909 } 3910 } 3911 /* update pointers information */ 3912 if (valid_constraints) { 3913 constraints_n[total_counts_cc] = valid_constraints; 3914 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3915 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3916 /* set change_of_basis flag */ 3917 if (boolforchange) { 3918 PetscBTSet(change_basis,total_counts_cc); 3919 } 3920 total_counts_cc++; 3921 } 3922 } 3923 /* free workspace */ 3924 if (!skip_lapack) { 3925 ierr = PetscFree(work);CHKERRQ(ierr); 3926 #if defined(PETSC_USE_COMPLEX) 3927 ierr = PetscFree(rwork);CHKERRQ(ierr); 3928 #endif 3929 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3930 #if defined(PETSC_MISSING_LAPACK_GESVD) 3931 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3932 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3933 #endif 3934 } 3935 for (k=0;k<nnsp_size;k++) { 3936 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3937 } 3938 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3939 /* free index sets of faces, edges and vertices */ 3940 for (i=0;i<n_ISForFaces;i++) { 3941 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3942 } 3943 if (n_ISForFaces) { 3944 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3945 } 3946 for (i=0;i<n_ISForEdges;i++) { 3947 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3948 } 3949 if (n_ISForEdges) { 3950 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3951 } 3952 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3953 } else { 3954 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3955 3956 total_counts = 0; 3957 n_vertices = 0; 3958 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3959 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3960 } 3961 max_constraints = 0; 3962 total_counts_cc = 0; 3963 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3964 total_counts += pcbddc->adaptive_constraints_n[i]; 3965 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3966 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3967 } 3968 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3969 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3970 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3971 constraints_data = pcbddc->adaptive_constraints_data; 3972 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3973 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3974 total_counts_cc = 0; 3975 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3976 if (pcbddc->adaptive_constraints_n[i]) { 3977 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3978 } 3979 } 3980 #if 0 3981 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3982 for (i=0;i<total_counts_cc;i++) { 3983 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3984 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3985 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3986 printf(" %d",constraints_idxs[j]); 3987 } 3988 printf("\n"); 3989 printf("number of cc: %d\n",constraints_n[i]); 3990 } 3991 for (i=0;i<n_vertices;i++) { 3992 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3993 } 3994 for (i=0;i<sub_schurs->n_subs;i++) { 3995 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3996 } 3997 #endif 3998 3999 max_size_of_constraint = 0; 4000 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4001 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4002 /* Change of basis */ 4003 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4004 if (pcbddc->use_change_of_basis) { 4005 for (i=0;i<sub_schurs->n_subs;i++) { 4006 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4007 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4008 } 4009 } 4010 } 4011 } 4012 pcbddc->local_primal_size = total_counts; 4013 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4014 4015 /* map constraints_idxs in boundary numbering */ 4016 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4017 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4018 4019 /* Create constraint matrix */ 4020 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4021 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4022 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4023 4024 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4025 /* determine if a QR strategy is needed for change of basis */ 4026 qr_needed = PETSC_FALSE; 4027 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4028 total_primal_vertices=0; 4029 pcbddc->local_primal_size_cc = 0; 4030 for (i=0;i<total_counts_cc;i++) { 4031 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4032 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4033 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4034 pcbddc->local_primal_size_cc += 1; 4035 } else if (PetscBTLookup(change_basis,i)) { 4036 for (k=0;k<constraints_n[i];k++) { 4037 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4038 } 4039 pcbddc->local_primal_size_cc += constraints_n[i]; 4040 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4041 PetscBTSet(qr_needed_idx,i); 4042 qr_needed = PETSC_TRUE; 4043 } 4044 } else { 4045 pcbddc->local_primal_size_cc += 1; 4046 } 4047 } 4048 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4049 pcbddc->n_vertices = total_primal_vertices; 4050 /* permute indices in order to have a sorted set of vertices */ 4051 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4052 4053 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4054 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4055 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4056 4057 /* nonzero structure of constraint matrix */ 4058 /* and get reference dof for local constraints */ 4059 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4060 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4061 4062 j = total_primal_vertices; 4063 total_counts = total_primal_vertices; 4064 cum = total_primal_vertices; 4065 for (i=n_vertices;i<total_counts_cc;i++) { 4066 if (!PetscBTLookup(change_basis,i)) { 4067 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4068 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4069 cum++; 4070 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4071 for (k=0;k<constraints_n[i];k++) { 4072 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4073 nnz[j+k] = size_of_constraint; 4074 } 4075 j += constraints_n[i]; 4076 } 4077 } 4078 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4079 ierr = PetscFree(nnz);CHKERRQ(ierr); 4080 4081 /* set values in constraint matrix */ 4082 for (i=0;i<total_primal_vertices;i++) { 4083 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4084 } 4085 total_counts = total_primal_vertices; 4086 for (i=n_vertices;i<total_counts_cc;i++) { 4087 if (!PetscBTLookup(change_basis,i)) { 4088 PetscInt *cols; 4089 4090 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4091 cols = constraints_idxs+constraints_idxs_ptr[i]; 4092 for (k=0;k<constraints_n[i];k++) { 4093 PetscInt row = total_counts+k; 4094 PetscScalar *vals; 4095 4096 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4097 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4098 } 4099 total_counts += constraints_n[i]; 4100 } 4101 } 4102 /* assembling */ 4103 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4104 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4105 4106 /* 4107 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4108 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4109 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4110 */ 4111 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4112 if (pcbddc->use_change_of_basis) { 4113 /* dual and primal dofs on a single cc */ 4114 PetscInt dual_dofs,primal_dofs; 4115 /* working stuff for GEQRF */ 4116 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4117 PetscBLASInt lqr_work; 4118 /* working stuff for UNGQR */ 4119 PetscScalar *gqr_work,lgqr_work_t; 4120 PetscBLASInt lgqr_work; 4121 /* working stuff for TRTRS */ 4122 PetscScalar *trs_rhs; 4123 PetscBLASInt Blas_NRHS; 4124 /* pointers for values insertion into change of basis matrix */ 4125 PetscInt *start_rows,*start_cols; 4126 PetscScalar *start_vals; 4127 /* working stuff for values insertion */ 4128 PetscBT is_primal; 4129 PetscInt *aux_primal_numbering_B; 4130 /* matrix sizes */ 4131 PetscInt global_size,local_size; 4132 /* temporary change of basis */ 4133 Mat localChangeOfBasisMatrix; 4134 /* extra space for debugging */ 4135 PetscScalar *dbg_work; 4136 4137 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4138 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4139 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4140 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4141 /* nonzeros for local mat */ 4142 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4143 if (!pcbddc->benign_change || pcbddc->fake_change) { 4144 for (i=0;i<pcis->n;i++) nnz[i]=1; 4145 } else { 4146 const PetscInt *ii; 4147 PetscInt n; 4148 PetscBool flg_row; 4149 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4150 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4151 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4152 } 4153 for (i=n_vertices;i<total_counts_cc;i++) { 4154 if (PetscBTLookup(change_basis,i)) { 4155 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4156 if (PetscBTLookup(qr_needed_idx,i)) { 4157 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4158 } else { 4159 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4160 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4161 } 4162 } 4163 } 4164 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4165 ierr = PetscFree(nnz);CHKERRQ(ierr); 4166 /* Set interior change in the matrix */ 4167 if (!pcbddc->benign_change || pcbddc->fake_change) { 4168 for (i=0;i<pcis->n;i++) { 4169 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4170 } 4171 } else { 4172 const PetscInt *ii,*jj; 4173 PetscScalar *aa; 4174 PetscInt n; 4175 PetscBool flg_row; 4176 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4177 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4178 for (i=0;i<n;i++) { 4179 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4180 } 4181 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4182 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4183 } 4184 4185 if (pcbddc->dbg_flag) { 4186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4187 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4188 } 4189 4190 4191 /* Now we loop on the constraints which need a change of basis */ 4192 /* 4193 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4194 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4195 4196 Basic blocks of change of basis matrix T computed by 4197 4198 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4199 4200 | 1 0 ... 0 s_1/S | 4201 | 0 1 ... 0 s_2/S | 4202 | ... | 4203 | 0 ... 1 s_{n-1}/S | 4204 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4205 4206 with S = \sum_{i=1}^n s_i^2 4207 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4208 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4209 4210 - QR decomposition of constraints otherwise 4211 */ 4212 if (qr_needed) { 4213 /* space to store Q */ 4214 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4215 /* first we issue queries for optimal work */ 4216 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4217 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4218 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4219 lqr_work = -1; 4220 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4221 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4222 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4223 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4224 lgqr_work = -1; 4225 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4226 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4227 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4228 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4229 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4230 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4231 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4232 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4233 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4234 /* array to store scaling factors for reflectors */ 4235 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4236 /* array to store rhs and solution of triangular solver */ 4237 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4238 /* allocating workspace for check */ 4239 if (pcbddc->dbg_flag) { 4240 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4241 } 4242 } 4243 /* array to store whether a node is primal or not */ 4244 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4245 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4246 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4247 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4248 for (i=0;i<total_primal_vertices;i++) { 4249 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4250 } 4251 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4252 4253 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4254 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4255 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4256 if (PetscBTLookup(change_basis,total_counts)) { 4257 /* get constraint info */ 4258 primal_dofs = constraints_n[total_counts]; 4259 dual_dofs = size_of_constraint-primal_dofs; 4260 4261 if (pcbddc->dbg_flag) { 4262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4263 } 4264 4265 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4266 4267 /* copy quadrature constraints for change of basis check */ 4268 if (pcbddc->dbg_flag) { 4269 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4270 } 4271 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4272 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4273 4274 /* compute QR decomposition of constraints */ 4275 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4276 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4277 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4279 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4280 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4282 4283 /* explictly compute R^-T */ 4284 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4285 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4286 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4287 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4289 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4290 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4291 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4292 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4293 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4294 4295 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4296 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4297 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4298 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4299 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4300 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4301 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4302 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4303 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4304 4305 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4306 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4307 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4308 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4309 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4310 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4312 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4313 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4314 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4315 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4316 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4317 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4318 4319 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4320 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4321 /* insert cols for primal dofs */ 4322 for (j=0;j<primal_dofs;j++) { 4323 start_vals = &qr_basis[j*size_of_constraint]; 4324 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4325 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4326 } 4327 /* insert cols for dual dofs */ 4328 for (j=0,k=0;j<dual_dofs;k++) { 4329 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4330 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4331 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4332 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4333 j++; 4334 } 4335 } 4336 4337 /* check change of basis */ 4338 if (pcbddc->dbg_flag) { 4339 PetscInt ii,jj; 4340 PetscBool valid_qr=PETSC_TRUE; 4341 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4342 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4343 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4344 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4345 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4346 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4347 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4348 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4349 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4350 for (jj=0;jj<size_of_constraint;jj++) { 4351 for (ii=0;ii<primal_dofs;ii++) { 4352 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4353 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4354 } 4355 } 4356 if (!valid_qr) { 4357 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4358 for (jj=0;jj<size_of_constraint;jj++) { 4359 for (ii=0;ii<primal_dofs;ii++) { 4360 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4361 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4362 } 4363 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4364 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4365 } 4366 } 4367 } 4368 } else { 4369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4370 } 4371 } 4372 } else { /* simple transformation block */ 4373 PetscInt row,col; 4374 PetscScalar val,norm; 4375 4376 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4377 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4378 for (j=0;j<size_of_constraint;j++) { 4379 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4380 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4381 if (!PetscBTLookup(is_primal,row_B)) { 4382 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4383 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4384 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4385 } else { 4386 for (k=0;k<size_of_constraint;k++) { 4387 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4388 if (row != col) { 4389 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4390 } else { 4391 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4392 } 4393 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4394 } 4395 } 4396 } 4397 if (pcbddc->dbg_flag) { 4398 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4399 } 4400 } 4401 } else { 4402 if (pcbddc->dbg_flag) { 4403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4404 } 4405 } 4406 } 4407 4408 /* free workspace */ 4409 if (qr_needed) { 4410 if (pcbddc->dbg_flag) { 4411 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4412 } 4413 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4414 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4415 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4416 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4417 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4418 } 4419 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4420 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4421 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4422 4423 /* assembling of global change of variable */ 4424 if (!pcbddc->fake_change) { 4425 Mat tmat; 4426 PetscInt bs; 4427 4428 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4429 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4430 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4431 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4432 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4433 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4434 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4435 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4436 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4437 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4438 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4439 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4440 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4441 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4442 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4443 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4444 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4445 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4446 4447 /* check */ 4448 if (pcbddc->dbg_flag) { 4449 PetscReal error; 4450 Vec x,x_change; 4451 4452 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4453 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4454 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4455 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4456 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4457 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4458 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4459 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4460 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4461 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4462 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4463 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4464 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4465 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4466 ierr = VecDestroy(&x);CHKERRQ(ierr); 4467 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4468 } 4469 /* adapt sub_schurs computed (if any) */ 4470 if (pcbddc->use_deluxe_scaling) { 4471 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4472 4473 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4474 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4475 } 4476 if (sub_schurs->S_Ej_all) { 4477 Mat S_new,tmat; 4478 IS is_all_N,is_V_Sall = NULL; 4479 4480 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4481 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4482 if (pcbddc->deluxe_zerorows) { 4483 ISLocalToGlobalMapping NtoSall; 4484 IS is_V; 4485 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4486 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4487 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4488 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4489 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4490 } 4491 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4492 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4493 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4494 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4495 if (pcbddc->deluxe_zerorows) { 4496 const PetscScalar *array; 4497 const PetscInt *idxs_V,*idxs_all; 4498 PetscInt i,n_V; 4499 4500 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4501 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4502 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4503 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4504 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4505 for (i=0;i<n_V;i++) { 4506 PetscScalar val; 4507 PetscInt idx; 4508 4509 idx = idxs_V[i]; 4510 val = array[idxs_all[idxs_V[i]]]; 4511 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4512 } 4513 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4514 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4515 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4516 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4517 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4518 } 4519 sub_schurs->S_Ej_all = S_new; 4520 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4521 if (sub_schurs->sum_S_Ej_all) { 4522 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4523 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4524 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4525 if (pcbddc->deluxe_zerorows) { 4526 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4527 } 4528 sub_schurs->sum_S_Ej_all = S_new; 4529 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4530 } 4531 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4532 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4533 } 4534 /* destroy any change of basis context in sub_schurs */ 4535 if (sub_schurs->change) { 4536 PetscInt i; 4537 4538 for (i=0;i<sub_schurs->n_subs;i++) { 4539 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4540 } 4541 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4542 } 4543 } 4544 if (pcbddc->switch_static) { /* need to save the local change */ 4545 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4546 } else { 4547 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4548 } 4549 /* determine if any process has changed the pressures locally */ 4550 pcbddc->change_interior = pcbddc->benign_have_null; 4551 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4552 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4553 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4554 pcbddc->use_qr_single = qr_needed; 4555 } 4556 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4557 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4558 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4559 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4560 } else { 4561 Mat benign_global = NULL; 4562 if (pcbddc->benign_have_null) { 4563 Mat tmat; 4564 4565 pcbddc->change_interior = PETSC_TRUE; 4566 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4567 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4568 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4569 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4570 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4571 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4572 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4573 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4574 if (pcbddc->benign_change) { 4575 Mat M; 4576 4577 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4578 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4579 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4580 ierr = MatDestroy(&M);CHKERRQ(ierr); 4581 } else { 4582 Mat eye; 4583 PetscScalar *array; 4584 4585 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4586 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4587 for (i=0;i<pcis->n;i++) { 4588 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4589 } 4590 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4591 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4592 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4593 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4594 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4595 } 4596 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4597 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4598 } 4599 if (pcbddc->user_ChangeOfBasisMatrix) { 4600 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4601 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4602 } else if (pcbddc->benign_have_null) { 4603 pcbddc->ChangeOfBasisMatrix = benign_global; 4604 } 4605 } 4606 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4607 IS is_global; 4608 const PetscInt *gidxs; 4609 4610 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4611 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4612 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4613 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4614 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4615 } 4616 } 4617 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4618 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4619 } 4620 4621 if (!pcbddc->fake_change) { 4622 /* add pressure dofs to set of primal nodes for numbering purposes */ 4623 for (i=0;i<pcbddc->benign_n;i++) { 4624 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4625 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4626 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4627 pcbddc->local_primal_size_cc++; 4628 pcbddc->local_primal_size++; 4629 } 4630 4631 /* check if a new primal space has been introduced (also take into account benign trick) */ 4632 pcbddc->new_primal_space_local = PETSC_TRUE; 4633 if (olocal_primal_size == pcbddc->local_primal_size) { 4634 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4635 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4636 if (!pcbddc->new_primal_space_local) { 4637 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4638 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4639 } 4640 } 4641 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4642 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4643 } 4644 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4645 4646 /* flush dbg viewer */ 4647 if (pcbddc->dbg_flag) { 4648 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4649 } 4650 4651 /* free workspace */ 4652 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4653 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4654 if (!pcbddc->adaptive_selection) { 4655 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4656 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4657 } else { 4658 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4659 pcbddc->adaptive_constraints_idxs_ptr, 4660 pcbddc->adaptive_constraints_data_ptr, 4661 pcbddc->adaptive_constraints_idxs, 4662 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4663 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4664 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4665 } 4666 PetscFunctionReturn(0); 4667 } 4668 4669 #undef __FUNCT__ 4670 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4671 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4672 { 4673 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4674 PC_IS *pcis = (PC_IS*)pc->data; 4675 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4676 PetscInt ierr,i,N; 4677 4678 PetscFunctionBegin; 4679 /* Reset previously computed graph */ 4680 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4681 /* Init local Graph struct */ 4682 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4683 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4684 4685 /* Check validity of the csr graph passed in by the user */ 4686 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4687 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4688 } 4689 4690 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4691 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4692 PetscInt *xadj,*adjncy; 4693 PetscInt nvtxs; 4694 PetscBool flg_row=PETSC_FALSE; 4695 4696 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4697 if (flg_row) { 4698 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4699 pcbddc->computed_rowadj = PETSC_TRUE; 4700 } 4701 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4702 } 4703 if (pcbddc->dbg_flag) { 4704 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4705 } 4706 4707 /* Setup of Graph */ 4708 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4709 4710 /* attach info on disconnected subdomains if present */ 4711 if (pcbddc->n_local_subs) { 4712 PetscInt *local_subs; 4713 4714 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4715 for (i=0;i<pcbddc->n_local_subs;i++) { 4716 const PetscInt *idxs; 4717 PetscInt nl,j; 4718 4719 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4720 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4721 for (j=0;j<nl;j++) { 4722 local_subs[idxs[j]] = i; 4723 } 4724 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4725 } 4726 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4727 pcbddc->mat_graph->local_subs = local_subs; 4728 } 4729 4730 /* Graph's connected components analysis */ 4731 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4732 PetscFunctionReturn(0); 4733 } 4734 4735 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4736 #undef __FUNCT__ 4737 #define __FUNCT__ "PCBDDCSubsetNumbering" 4738 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4739 { 4740 PetscSF sf; 4741 PetscLayout map; 4742 const PetscInt *idxs; 4743 PetscInt *leaf_data,*root_data,*gidxs; 4744 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4745 PetscInt n_n,nlocals,start,first_index; 4746 PetscMPIInt commsize; 4747 PetscBool first_found; 4748 PetscErrorCode ierr; 4749 4750 PetscFunctionBegin; 4751 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4752 if (subset_mult) { 4753 PetscCheckSameComm(subset,1,subset_mult,2); 4754 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4755 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4756 } 4757 /* create workspace layout for computing global indices of subset */ 4758 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4759 lbounds[0] = lbounds[1] = 0; 4760 for (i=0;i<n;i++) { 4761 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4762 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4763 } 4764 lbounds[0] = -lbounds[0]; 4765 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4766 gbounds[0] = -gbounds[0]; 4767 N = gbounds[1] - gbounds[0] + 1; 4768 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4769 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4770 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4771 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4772 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4773 4774 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4775 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4776 if (subset_mult) { 4777 const PetscInt* idxs_mult; 4778 4779 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4780 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4781 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4782 } else { 4783 for (i=0;i<n;i++) leaf_data[i] = 1; 4784 } 4785 /* local size of new subset */ 4786 n_n = 0; 4787 for (i=0;i<n;i++) n_n += leaf_data[i]; 4788 4789 /* global indexes in layout */ 4790 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4791 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4792 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4793 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4794 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4795 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4796 4797 /* reduce from leaves to roots */ 4798 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4799 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4800 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4801 4802 /* count indexes in local part of layout */ 4803 nlocals = 0; 4804 first_index = -1; 4805 first_found = PETSC_FALSE; 4806 for (i=0;i<Nl;i++) { 4807 if (!first_found && root_data[i]) { 4808 first_found = PETSC_TRUE; 4809 first_index = i; 4810 } 4811 nlocals += root_data[i]; 4812 } 4813 4814 /* cumulative of number of indexes and size of subset without holes */ 4815 #if defined(PETSC_HAVE_MPI_EXSCAN) 4816 start = 0; 4817 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4818 #else 4819 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4820 start = start-nlocals; 4821 #endif 4822 4823 if (N_n) { /* compute total size of new subset if requested */ 4824 *N_n = start + nlocals; 4825 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4826 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4827 } 4828 4829 /* adapt root data with cumulative */ 4830 if (first_found) { 4831 PetscInt old_index; 4832 4833 root_data[first_index] += start; 4834 old_index = first_index; 4835 for (i=first_index+1;i<Nl;i++) { 4836 if (root_data[i]) { 4837 root_data[i] += root_data[old_index]; 4838 old_index = i; 4839 } 4840 } 4841 } 4842 4843 /* from roots to leaves */ 4844 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4845 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4846 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4847 4848 /* create new IS with global indexes without holes */ 4849 if (subset_mult) { 4850 const PetscInt* idxs_mult; 4851 PetscInt cum; 4852 4853 cum = 0; 4854 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4855 for (i=0;i<n;i++) { 4856 PetscInt j; 4857 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4858 } 4859 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4860 } else { 4861 for (i=0;i<n;i++) { 4862 gidxs[i] = leaf_data[i]-1; 4863 } 4864 } 4865 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4866 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4867 PetscFunctionReturn(0); 4868 } 4869 4870 #undef __FUNCT__ 4871 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4872 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4873 { 4874 PetscInt i,j; 4875 PetscScalar *alphas; 4876 PetscErrorCode ierr; 4877 4878 PetscFunctionBegin; 4879 /* this implements stabilized Gram-Schmidt */ 4880 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4881 for (i=0;i<n;i++) { 4882 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4883 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4884 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4885 } 4886 ierr = PetscFree(alphas);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatISGetSubassemblingPattern" 4892 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4893 { 4894 Mat A; 4895 PetscInt n_neighs,*neighs,*n_shared,**shared; 4896 PetscMPIInt size,rank,color; 4897 PetscInt *xadj,*adjncy; 4898 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4899 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4900 PetscInt void_procs,*procs_candidates = NULL; 4901 PetscInt xadj_count, *count; 4902 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4903 PetscSubcomm psubcomm; 4904 MPI_Comm subcomm; 4905 PetscErrorCode ierr; 4906 4907 PetscFunctionBegin; 4908 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4909 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4910 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4911 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4912 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4913 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4914 4915 if (have_void) *have_void = PETSC_FALSE; 4916 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4917 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4918 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4919 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4920 im_active = !!(n); 4921 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4922 void_procs = size - active_procs; 4923 /* get ranks of of non-active processes in mat communicator */ 4924 if (void_procs) { 4925 PetscInt ncand; 4926 4927 if (have_void) *have_void = PETSC_TRUE; 4928 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4929 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4930 for (i=0,ncand=0;i<size;i++) { 4931 if (!procs_candidates[i]) { 4932 procs_candidates[ncand++] = i; 4933 } 4934 } 4935 /* force n_subdomains to be not greater that the number of non-active processes */ 4936 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4937 } 4938 4939 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4940 if (active_procs < *n_subdomains) { 4941 PetscInt issize,isidx; 4942 if (im_active) { 4943 issize = 1; 4944 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4945 isidx = procs_candidates[rank]; 4946 } else { 4947 isidx = rank; 4948 } 4949 } else { 4950 issize = 0; 4951 isidx = -1; 4952 } 4953 *n_subdomains = active_procs; 4954 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4955 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4959 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4960 threshold = PetscMax(threshold,2); 4961 4962 /* Get info on mapping */ 4963 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4964 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4965 4966 /* build local CSR graph of subdomains' connectivity */ 4967 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4968 xadj[0] = 0; 4969 xadj[1] = PetscMax(n_neighs-1,0); 4970 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4971 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4972 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4973 for (i=1;i<n_neighs;i++) 4974 for (j=0;j<n_shared[i];j++) 4975 count[shared[i][j]] += 1; 4976 4977 xadj_count = 0; 4978 for (i=1;i<n_neighs;i++) { 4979 for (j=0;j<n_shared[i];j++) { 4980 if (count[shared[i][j]] < threshold) { 4981 adjncy[xadj_count] = neighs[i]; 4982 adjncy_wgt[xadj_count] = n_shared[i]; 4983 xadj_count++; 4984 break; 4985 } 4986 } 4987 } 4988 xadj[1] = xadj_count; 4989 ierr = PetscFree(count);CHKERRQ(ierr); 4990 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4991 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4992 4993 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4994 4995 /* Restrict work on active processes only */ 4996 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4997 if (void_procs) { 4998 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4999 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5000 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5001 subcomm = PetscSubcommChild(psubcomm); 5002 } else { 5003 psubcomm = NULL; 5004 subcomm = PetscObjectComm((PetscObject)mat); 5005 } 5006 5007 v_wgt = NULL; 5008 if (!color) { 5009 ierr = PetscFree(xadj);CHKERRQ(ierr); 5010 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5011 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5012 } else { 5013 Mat subdomain_adj; 5014 IS new_ranks,new_ranks_contig; 5015 MatPartitioning partitioner; 5016 PetscInt rstart=0,rend=0; 5017 PetscInt *is_indices,*oldranks; 5018 PetscMPIInt size; 5019 PetscBool aggregate; 5020 5021 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5022 if (void_procs) { 5023 PetscInt prank = rank; 5024 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5025 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5026 for (i=0;i<xadj[1];i++) { 5027 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5028 } 5029 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5030 } else { 5031 oldranks = NULL; 5032 } 5033 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5034 if (aggregate) { /* TODO: all this part could be made more efficient */ 5035 PetscInt lrows,row,ncols,*cols; 5036 PetscMPIInt nrank; 5037 PetscScalar *vals; 5038 5039 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5040 lrows = 0; 5041 if (nrank<redprocs) { 5042 lrows = size/redprocs; 5043 if (nrank<size%redprocs) lrows++; 5044 } 5045 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5046 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5047 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5048 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5049 row = nrank; 5050 ncols = xadj[1]-xadj[0]; 5051 cols = adjncy; 5052 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5053 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5054 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5055 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5056 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5057 ierr = PetscFree(xadj);CHKERRQ(ierr); 5058 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5059 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5060 ierr = PetscFree(vals);CHKERRQ(ierr); 5061 if (use_vwgt) { 5062 Vec v; 5063 const PetscScalar *array; 5064 PetscInt nl; 5065 5066 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5067 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5068 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5069 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5070 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5071 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5072 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5073 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5074 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5075 ierr = VecDestroy(&v);CHKERRQ(ierr); 5076 } 5077 } else { 5078 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5079 if (use_vwgt) { 5080 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5081 v_wgt[0] = local_size; 5082 } 5083 } 5084 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5085 5086 /* Partition */ 5087 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5088 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5089 if (v_wgt) { 5090 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5091 } 5092 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5093 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5094 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5095 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5096 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5097 5098 /* renumber new_ranks to avoid "holes" in new set of processors */ 5099 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5100 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5101 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5102 if (!aggregate) { 5103 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5104 #if defined(PETSC_USE_DEBUG) 5105 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5106 #endif 5107 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5108 } else if (oldranks) { 5109 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5110 } else { 5111 ranks_send_to_idx[0] = is_indices[0]; 5112 } 5113 } else { 5114 PetscInt idxs[1]; 5115 PetscMPIInt tag; 5116 MPI_Request *reqs; 5117 5118 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5119 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5120 for (i=rstart;i<rend;i++) { 5121 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5122 } 5123 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5124 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5125 ierr = PetscFree(reqs);CHKERRQ(ierr); 5126 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5127 #if defined(PETSC_USE_DEBUG) 5128 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5129 #endif 5130 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5131 } else if (oldranks) { 5132 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5133 } else { 5134 ranks_send_to_idx[0] = idxs[0]; 5135 } 5136 } 5137 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5138 /* clean up */ 5139 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5140 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5141 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5142 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5143 } 5144 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5145 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5146 5147 /* assemble parallel IS for sends */ 5148 i = 1; 5149 if (!color) i=0; 5150 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5151 PetscFunctionReturn(0); 5152 } 5153 5154 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5155 5156 #undef __FUNCT__ 5157 #define __FUNCT__ "MatISSubassemble" 5158 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5159 { 5160 Mat local_mat; 5161 IS is_sends_internal; 5162 PetscInt rows,cols,new_local_rows; 5163 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5164 PetscBool ismatis,isdense,newisdense,destroy_mat; 5165 ISLocalToGlobalMapping l2gmap; 5166 PetscInt* l2gmap_indices; 5167 const PetscInt* is_indices; 5168 MatType new_local_type; 5169 /* buffers */ 5170 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5171 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5172 PetscInt *recv_buffer_idxs_local; 5173 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5174 /* MPI */ 5175 MPI_Comm comm,comm_n; 5176 PetscSubcomm subcomm; 5177 PetscMPIInt n_sends,n_recvs,commsize; 5178 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5179 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5180 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5181 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5182 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5183 PetscErrorCode ierr; 5184 5185 PetscFunctionBegin; 5186 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5187 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5188 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5189 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5190 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5191 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5192 PetscValidLogicalCollectiveBool(mat,reuse,6); 5193 PetscValidLogicalCollectiveInt(mat,nis,8); 5194 5195 /* further checks */ 5196 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5197 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5198 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5199 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5200 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5201 if (reuse && *mat_n) { 5202 PetscInt mrows,mcols,mnrows,mncols; 5203 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5204 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5205 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5206 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5207 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5208 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5209 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5210 } 5211 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5212 PetscValidLogicalCollectiveInt(mat,bs,0); 5213 5214 /* prepare IS for sending if not provided */ 5215 if (!is_sends) { 5216 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5217 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5218 } else { 5219 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5220 is_sends_internal = is_sends; 5221 } 5222 5223 /* get comm */ 5224 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5225 5226 /* compute number of sends */ 5227 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5228 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5229 5230 /* compute number of receives */ 5231 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5232 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5233 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5234 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5235 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5236 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5237 ierr = PetscFree(iflags);CHKERRQ(ierr); 5238 5239 /* restrict comm if requested */ 5240 subcomm = 0; 5241 destroy_mat = PETSC_FALSE; 5242 if (restrict_comm) { 5243 PetscMPIInt color,subcommsize; 5244 5245 color = 0; 5246 if (restrict_full) { 5247 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5248 } else { 5249 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5250 } 5251 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5252 subcommsize = commsize - subcommsize; 5253 /* check if reuse has been requested */ 5254 if (reuse) { 5255 if (*mat_n) { 5256 PetscMPIInt subcommsize2; 5257 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5258 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5259 comm_n = PetscObjectComm((PetscObject)*mat_n); 5260 } else { 5261 comm_n = PETSC_COMM_SELF; 5262 } 5263 } else { /* MAT_INITIAL_MATRIX */ 5264 PetscMPIInt rank; 5265 5266 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5267 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5268 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5269 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5270 comm_n = PetscSubcommChild(subcomm); 5271 } 5272 /* flag to destroy *mat_n if not significative */ 5273 if (color) destroy_mat = PETSC_TRUE; 5274 } else { 5275 comm_n = comm; 5276 } 5277 5278 /* prepare send/receive buffers */ 5279 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5280 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5281 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5282 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5283 if (nis) { 5284 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5285 } 5286 5287 /* Get data from local matrices */ 5288 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5289 /* TODO: See below some guidelines on how to prepare the local buffers */ 5290 /* 5291 send_buffer_vals should contain the raw values of the local matrix 5292 send_buffer_idxs should contain: 5293 - MatType_PRIVATE type 5294 - PetscInt size_of_l2gmap 5295 - PetscInt global_row_indices[size_of_l2gmap] 5296 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5297 */ 5298 else { 5299 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5300 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5301 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5302 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5303 send_buffer_idxs[1] = i; 5304 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5305 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5306 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5307 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5308 for (i=0;i<n_sends;i++) { 5309 ilengths_vals[is_indices[i]] = len*len; 5310 ilengths_idxs[is_indices[i]] = len+2; 5311 } 5312 } 5313 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5314 /* additional is (if any) */ 5315 if (nis) { 5316 PetscMPIInt psum; 5317 PetscInt j; 5318 for (j=0,psum=0;j<nis;j++) { 5319 PetscInt plen; 5320 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5321 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5322 psum += len+1; /* indices + lenght */ 5323 } 5324 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5325 for (j=0,psum=0;j<nis;j++) { 5326 PetscInt plen; 5327 const PetscInt *is_array_idxs; 5328 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5329 send_buffer_idxs_is[psum] = plen; 5330 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5331 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5332 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5333 psum += plen+1; /* indices + lenght */ 5334 } 5335 for (i=0;i<n_sends;i++) { 5336 ilengths_idxs_is[is_indices[i]] = psum; 5337 } 5338 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5339 } 5340 5341 buf_size_idxs = 0; 5342 buf_size_vals = 0; 5343 buf_size_idxs_is = 0; 5344 for (i=0;i<n_recvs;i++) { 5345 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5346 buf_size_vals += (PetscInt)olengths_vals[i]; 5347 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5348 } 5349 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5350 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5351 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5352 5353 /* get new tags for clean communications */ 5354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5356 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5357 5358 /* allocate for requests */ 5359 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5360 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5361 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5362 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5363 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5364 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5365 5366 /* communications */ 5367 ptr_idxs = recv_buffer_idxs; 5368 ptr_vals = recv_buffer_vals; 5369 ptr_idxs_is = recv_buffer_idxs_is; 5370 for (i=0;i<n_recvs;i++) { 5371 source_dest = onodes[i]; 5372 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5373 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5374 ptr_idxs += olengths_idxs[i]; 5375 ptr_vals += olengths_vals[i]; 5376 if (nis) { 5377 source_dest = onodes_is[i]; 5378 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5379 ptr_idxs_is += olengths_idxs_is[i]; 5380 } 5381 } 5382 for (i=0;i<n_sends;i++) { 5383 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5384 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5385 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5386 if (nis) { 5387 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5388 } 5389 } 5390 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5391 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5392 5393 /* assemble new l2g map */ 5394 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5395 ptr_idxs = recv_buffer_idxs; 5396 new_local_rows = 0; 5397 for (i=0;i<n_recvs;i++) { 5398 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5399 ptr_idxs += olengths_idxs[i]; 5400 } 5401 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5402 ptr_idxs = recv_buffer_idxs; 5403 new_local_rows = 0; 5404 for (i=0;i<n_recvs;i++) { 5405 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5406 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5407 ptr_idxs += olengths_idxs[i]; 5408 } 5409 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5410 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5411 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5412 5413 /* infer new local matrix type from received local matrices type */ 5414 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5415 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5416 if (n_recvs) { 5417 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5418 ptr_idxs = recv_buffer_idxs; 5419 for (i=0;i<n_recvs;i++) { 5420 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5421 new_local_type_private = MATAIJ_PRIVATE; 5422 break; 5423 } 5424 ptr_idxs += olengths_idxs[i]; 5425 } 5426 switch (new_local_type_private) { 5427 case MATDENSE_PRIVATE: 5428 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5429 new_local_type = MATSEQAIJ; 5430 bs = 1; 5431 } else { /* if I receive only 1 dense matrix */ 5432 new_local_type = MATSEQDENSE; 5433 bs = 1; 5434 } 5435 break; 5436 case MATAIJ_PRIVATE: 5437 new_local_type = MATSEQAIJ; 5438 bs = 1; 5439 break; 5440 case MATBAIJ_PRIVATE: 5441 new_local_type = MATSEQBAIJ; 5442 break; 5443 case MATSBAIJ_PRIVATE: 5444 new_local_type = MATSEQSBAIJ; 5445 break; 5446 default: 5447 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5448 break; 5449 } 5450 } else { /* by default, new_local_type is seqdense */ 5451 new_local_type = MATSEQDENSE; 5452 bs = 1; 5453 } 5454 5455 /* create MATIS object if needed */ 5456 if (!reuse) { 5457 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5458 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5459 } else { 5460 /* it also destroys the local matrices */ 5461 if (*mat_n) { 5462 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5463 } else { /* this is a fake object */ 5464 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5465 } 5466 } 5467 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5468 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5469 5470 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5471 5472 /* Global to local map of received indices */ 5473 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5474 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5475 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5476 5477 /* restore attributes -> type of incoming data and its size */ 5478 buf_size_idxs = 0; 5479 for (i=0;i<n_recvs;i++) { 5480 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5481 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5482 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5483 } 5484 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5485 5486 /* set preallocation */ 5487 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5488 if (!newisdense) { 5489 PetscInt *new_local_nnz=0; 5490 5491 ptr_idxs = recv_buffer_idxs_local; 5492 if (n_recvs) { 5493 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5494 } 5495 for (i=0;i<n_recvs;i++) { 5496 PetscInt j; 5497 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5498 for (j=0;j<*(ptr_idxs+1);j++) { 5499 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5500 } 5501 } else { 5502 /* TODO */ 5503 } 5504 ptr_idxs += olengths_idxs[i]; 5505 } 5506 if (new_local_nnz) { 5507 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5508 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5509 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5510 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5511 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5512 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5513 } else { 5514 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5515 } 5516 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5517 } else { 5518 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5519 } 5520 5521 /* set values */ 5522 ptr_vals = recv_buffer_vals; 5523 ptr_idxs = recv_buffer_idxs_local; 5524 for (i=0;i<n_recvs;i++) { 5525 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5526 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5527 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5528 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5529 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5530 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5531 } else { 5532 /* TODO */ 5533 } 5534 ptr_idxs += olengths_idxs[i]; 5535 ptr_vals += olengths_vals[i]; 5536 } 5537 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5538 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5539 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5540 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5541 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5542 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5543 5544 #if 0 5545 if (!restrict_comm) { /* check */ 5546 Vec lvec,rvec; 5547 PetscReal infty_error; 5548 5549 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5550 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5551 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5552 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5553 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5554 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5555 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5556 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5557 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5558 } 5559 #endif 5560 5561 /* assemble new additional is (if any) */ 5562 if (nis) { 5563 PetscInt **temp_idxs,*count_is,j,psum; 5564 5565 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5566 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5567 ptr_idxs = recv_buffer_idxs_is; 5568 psum = 0; 5569 for (i=0;i<n_recvs;i++) { 5570 for (j=0;j<nis;j++) { 5571 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5572 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5573 psum += plen; 5574 ptr_idxs += plen+1; /* shift pointer to received data */ 5575 } 5576 } 5577 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5578 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5579 for (i=1;i<nis;i++) { 5580 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5581 } 5582 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5583 ptr_idxs = recv_buffer_idxs_is; 5584 for (i=0;i<n_recvs;i++) { 5585 for (j=0;j<nis;j++) { 5586 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5587 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5588 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5589 ptr_idxs += plen+1; /* shift pointer to received data */ 5590 } 5591 } 5592 for (i=0;i<nis;i++) { 5593 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5594 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5595 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5596 } 5597 ierr = PetscFree(count_is);CHKERRQ(ierr); 5598 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5599 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5600 } 5601 /* free workspace */ 5602 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5603 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5604 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5605 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5606 if (isdense) { 5607 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5608 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5609 } else { 5610 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5611 } 5612 if (nis) { 5613 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5614 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5615 } 5616 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5617 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5618 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5619 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5620 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5621 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5622 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5623 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5624 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5625 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5626 ierr = PetscFree(onodes);CHKERRQ(ierr); 5627 if (nis) { 5628 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5629 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5630 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5631 } 5632 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5633 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5634 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5635 for (i=0;i<nis;i++) { 5636 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5637 } 5638 *mat_n = NULL; 5639 } 5640 PetscFunctionReturn(0); 5641 } 5642 5643 /* temporary hack into ksp private data structure */ 5644 #include <petsc/private/kspimpl.h> 5645 5646 #undef __FUNCT__ 5647 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5648 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5649 { 5650 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5651 PC_IS *pcis = (PC_IS*)pc->data; 5652 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5653 MatNullSpace CoarseNullSpace = NULL; 5654 ISLocalToGlobalMapping coarse_islg; 5655 IS coarse_is,*isarray; 5656 PetscInt i,im_active=-1,active_procs=-1; 5657 PetscInt nis,nisdofs,nisneu,nisvert; 5658 PC pc_temp; 5659 PCType coarse_pc_type; 5660 KSPType coarse_ksp_type; 5661 PetscBool multilevel_requested,multilevel_allowed; 5662 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5663 Mat t_coarse_mat_is; 5664 PetscInt ncoarse; 5665 PetscBool compute_vecs = PETSC_FALSE; 5666 PetscScalar *array; 5667 MatReuse coarse_mat_reuse; 5668 PetscBool restr, full_restr, have_void; 5669 PetscErrorCode ierr; 5670 5671 PetscFunctionBegin; 5672 /* Assign global numbering to coarse dofs */ 5673 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5674 PetscInt ocoarse_size; 5675 compute_vecs = PETSC_TRUE; 5676 ocoarse_size = pcbddc->coarse_size; 5677 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5678 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5679 /* see if we can avoid some work */ 5680 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5681 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5682 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5683 PC pc; 5684 PetscBool isbddc; 5685 5686 /* temporary workaround since PCBDDC does not have a reset method so far */ 5687 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5688 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5689 if (isbddc) { 5690 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5691 } else { 5692 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5693 } 5694 coarse_reuse = PETSC_FALSE; 5695 } else { /* we can safely reuse already computed coarse matrix */ 5696 coarse_reuse = PETSC_TRUE; 5697 } 5698 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5699 coarse_reuse = PETSC_FALSE; 5700 } 5701 /* reset any subassembling information */ 5702 if (!coarse_reuse || pcbddc->recompute_topography) { 5703 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5704 } 5705 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5706 coarse_reuse = PETSC_TRUE; 5707 } 5708 /* assemble coarse matrix */ 5709 if (coarse_reuse && pcbddc->coarse_ksp) { 5710 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5711 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5712 coarse_mat_reuse = MAT_REUSE_MATRIX; 5713 } else { 5714 coarse_mat = NULL; 5715 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5716 } 5717 5718 /* creates temporary l2gmap and IS for coarse indexes */ 5719 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5720 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5721 5722 /* creates temporary MATIS object for coarse matrix */ 5723 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5724 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5725 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5726 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5727 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5728 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5729 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5730 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5731 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5732 5733 /* count "active" (i.e. with positive local size) and "void" processes */ 5734 im_active = !!(pcis->n); 5735 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5736 5737 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5738 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5739 /* full_restr : just use the receivers from the subassembling pattern */ 5740 coarse_mat_is = NULL; 5741 multilevel_allowed = PETSC_FALSE; 5742 multilevel_requested = PETSC_FALSE; 5743 full_restr = PETSC_TRUE; 5744 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5745 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5746 if (multilevel_requested) { 5747 ncoarse = active_procs/pcbddc->coarsening_ratio; 5748 restr = PETSC_FALSE; 5749 full_restr = PETSC_FALSE; 5750 } else { 5751 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5752 restr = PETSC_TRUE; 5753 full_restr = PETSC_TRUE; 5754 } 5755 ncoarse = PetscMax(1,ncoarse); 5756 if (!pcbddc->coarse_subassembling) { 5757 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5758 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5759 PetscInt psum; 5760 PetscMPIInt size; 5761 if (pcbddc->coarse_ksp) psum = 1; 5762 else psum = 0; 5763 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5764 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5765 if (ncoarse < size) have_void = PETSC_TRUE; 5766 } 5767 /* determine if we can go multilevel */ 5768 if (multilevel_requested) { 5769 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5770 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5771 } 5772 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5773 5774 /* dump subassembling pattern */ 5775 if (pcbddc->dbg_flag && multilevel_allowed) { 5776 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5777 } 5778 5779 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5780 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5781 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5782 const PetscInt *idxs; 5783 ISLocalToGlobalMapping tmap; 5784 5785 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5786 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5787 /* allocate space for temporary storage */ 5788 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5789 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5790 /* allocate for IS array */ 5791 nisdofs = pcbddc->n_ISForDofsLocal; 5792 nisneu = !!pcbddc->NeumannBoundariesLocal; 5793 nisvert = 0; /* nisvert is not used */ 5794 nis = nisdofs + nisneu + nisvert; 5795 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5796 /* dofs splitting */ 5797 for (i=0;i<nisdofs;i++) { 5798 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5799 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5800 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5801 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5802 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5803 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5804 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5805 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5806 } 5807 /* neumann boundaries */ 5808 if (pcbddc->NeumannBoundariesLocal) { 5809 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5810 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5811 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5812 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5813 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5814 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5815 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5816 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5817 } 5818 /* free memory */ 5819 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5820 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5821 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5822 } else { 5823 nis = 0; 5824 nisdofs = 0; 5825 nisneu = 0; 5826 nisvert = 0; 5827 isarray = NULL; 5828 } 5829 /* destroy no longer needed map */ 5830 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5831 5832 /* subassemble */ 5833 if (multilevel_allowed) { 5834 PetscBool reuse,reuser; 5835 if (coarse_mat) reuse = PETSC_TRUE; 5836 else reuse = PETSC_FALSE; 5837 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5838 if (reuser) { 5839 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5840 } else { 5841 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5842 } 5843 /* TODO: if (pcbddc->benign_have_null) -> give a hint to the coarser levels if they have to locally apply the benign trick or not */ 5844 } else { 5845 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5846 } 5847 if (coarse_mat_is || coarse_mat) { 5848 PetscMPIInt size; 5849 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5850 if (!multilevel_allowed) { 5851 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5852 } else { 5853 Mat A; 5854 5855 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5856 if (coarse_mat_is) { 5857 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5858 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5859 coarse_mat = coarse_mat_is; 5860 } 5861 /* be sure we don't have MatSeqDENSE as local mat */ 5862 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5863 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5864 } 5865 } 5866 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5867 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5868 5869 /* create local to global scatters for coarse problem */ 5870 if (compute_vecs) { 5871 PetscInt lrows; 5872 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5873 if (coarse_mat) { 5874 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5875 } else { 5876 lrows = 0; 5877 } 5878 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5879 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5880 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5881 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5882 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5883 } 5884 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5885 5886 /* set defaults for coarse KSP and PC */ 5887 if (multilevel_allowed) { 5888 coarse_ksp_type = KSPRICHARDSON; 5889 coarse_pc_type = PCBDDC; 5890 } else { 5891 coarse_ksp_type = KSPPREONLY; 5892 coarse_pc_type = PCREDUNDANT; 5893 } 5894 5895 /* print some info if requested */ 5896 if (pcbddc->dbg_flag) { 5897 if (!multilevel_allowed) { 5898 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5899 if (multilevel_requested) { 5900 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5901 } else if (pcbddc->max_levels) { 5902 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5903 } 5904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5905 } 5906 } 5907 5908 /* create the coarse KSP object only once with defaults */ 5909 if (coarse_mat) { 5910 PetscViewer dbg_viewer = NULL; 5911 if (pcbddc->dbg_flag) { 5912 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5913 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5914 } 5915 if (!pcbddc->coarse_ksp) { 5916 char prefix[256],str_level[16]; 5917 size_t len; 5918 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5919 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5920 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5921 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5922 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5923 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5924 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5925 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5926 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5927 /* prefix */ 5928 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5929 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5930 if (!pcbddc->current_level) { 5931 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5932 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5933 } else { 5934 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5935 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5936 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5937 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5938 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5939 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5940 } 5941 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5942 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5943 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5944 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5945 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5946 /* allow user customization */ 5947 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5948 } 5949 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5950 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5951 if (nisdofs) { 5952 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5953 for (i=0;i<nisdofs;i++) { 5954 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5955 } 5956 } 5957 if (nisneu) { 5958 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5959 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5960 } 5961 if (nisvert) { 5962 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5963 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5964 } 5965 5966 /* get some info after set from options */ 5967 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5968 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5969 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5970 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5971 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5972 isbddc = PETSC_FALSE; 5973 } 5974 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5975 if (isredundant) { 5976 KSP inner_ksp; 5977 PC inner_pc; 5978 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5979 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5980 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5981 } 5982 5983 /* parameters which miss an API */ 5984 if (isbddc) { 5985 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5986 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5987 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5988 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5989 if (pcbddc_coarse->benign_saddle_point) { 5990 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5991 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5992 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5993 } 5994 } 5995 5996 /* propagate symmetry info of coarse matrix */ 5997 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5998 if (pc->pmat->symmetric_set) { 5999 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6000 } 6001 if (pc->pmat->hermitian_set) { 6002 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6003 } 6004 if (pc->pmat->spd_set) { 6005 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6006 } 6007 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6008 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6009 } 6010 /* set operators */ 6011 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6012 if (pcbddc->dbg_flag) { 6013 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6014 } 6015 } 6016 ierr = PetscFree(isarray);CHKERRQ(ierr); 6017 #if 0 6018 { 6019 PetscViewer viewer; 6020 char filename[256]; 6021 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6022 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6023 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6024 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6025 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6026 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6027 } 6028 #endif 6029 6030 if (pcbddc->coarse_ksp) { 6031 Vec crhs,csol; 6032 6033 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6034 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6035 if (!csol) { 6036 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6037 } 6038 if (!crhs) { 6039 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6040 } 6041 } 6042 6043 /* compute null space for coarse solver if the benign trick has been requested */ 6044 if (pcbddc->benign_null) { 6045 6046 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6047 for (i=0;i<pcbddc->benign_n;i++) { 6048 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6049 } 6050 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6051 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6052 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6053 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6054 if (coarse_mat) { 6055 Vec nullv; 6056 PetscScalar *array,*array2; 6057 PetscInt nl; 6058 6059 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6060 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6061 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6062 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6063 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6064 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6065 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6066 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6067 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6068 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6069 } 6070 } 6071 6072 if (pcbddc->coarse_ksp) { 6073 PetscBool ispreonly; 6074 6075 if (CoarseNullSpace) { 6076 PetscBool isnull; 6077 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6078 if (isnull) { 6079 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6080 } 6081 /* TODO: add local nullspaces (if any) */ 6082 } 6083 /* setup coarse ksp */ 6084 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6085 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6086 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6087 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6088 KSP check_ksp; 6089 KSPType check_ksp_type; 6090 PC check_pc; 6091 Vec check_vec,coarse_vec; 6092 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6093 PetscInt its; 6094 PetscBool compute_eigs; 6095 PetscReal *eigs_r,*eigs_c; 6096 PetscInt neigs; 6097 const char *prefix; 6098 6099 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6100 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6101 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6102 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6103 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6104 /* prevent from setup unneeded object */ 6105 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6106 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6107 if (ispreonly) { 6108 check_ksp_type = KSPPREONLY; 6109 compute_eigs = PETSC_FALSE; 6110 } else { 6111 check_ksp_type = KSPGMRES; 6112 compute_eigs = PETSC_TRUE; 6113 } 6114 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6115 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6116 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6117 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6118 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6119 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6120 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6121 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6122 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6123 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6124 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6125 /* create random vec */ 6126 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6127 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6128 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6129 /* solve coarse problem */ 6130 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6131 /* set eigenvalue estimation if preonly has not been requested */ 6132 if (compute_eigs) { 6133 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6134 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6135 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6136 lambda_max = eigs_r[neigs-1]; 6137 lambda_min = eigs_r[0]; 6138 if (pcbddc->use_coarse_estimates) { 6139 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6140 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6141 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6142 } 6143 } 6144 } 6145 6146 /* check coarse problem residual error */ 6147 if (pcbddc->dbg_flag) { 6148 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6149 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6150 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6151 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6152 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6153 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6154 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6155 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6156 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6157 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6158 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6159 if (CoarseNullSpace) { 6160 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6161 } 6162 if (compute_eigs) { 6163 PetscReal lambda_max_s,lambda_min_s; 6164 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6165 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6166 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6167 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6168 for (i=0;i<neigs;i++) { 6169 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6170 } 6171 } 6172 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6173 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6174 } 6175 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6176 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6177 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6178 if (compute_eigs) { 6179 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6180 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6181 } 6182 } 6183 } 6184 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6185 /* print additional info */ 6186 if (pcbddc->dbg_flag) { 6187 /* waits until all processes reaches this point */ 6188 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6189 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6190 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6191 } 6192 6193 /* free memory */ 6194 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6195 PetscFunctionReturn(0); 6196 } 6197 6198 #undef __FUNCT__ 6199 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6200 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6201 { 6202 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6203 PC_IS* pcis = (PC_IS*)pc->data; 6204 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6205 IS subset,subset_mult,subset_n; 6206 PetscInt local_size,coarse_size=0; 6207 PetscInt *local_primal_indices=NULL; 6208 const PetscInt *t_local_primal_indices; 6209 PetscErrorCode ierr; 6210 6211 PetscFunctionBegin; 6212 /* Compute global number of coarse dofs */ 6213 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6214 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6215 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6216 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6217 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6218 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6219 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6220 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6221 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6222 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6223 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6224 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6225 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6226 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6227 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6228 6229 /* check numbering */ 6230 if (pcbddc->dbg_flag) { 6231 PetscScalar coarsesum,*array,*array2; 6232 PetscInt i; 6233 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6234 6235 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6237 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6238 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6239 /* counter */ 6240 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6241 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6242 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6243 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6244 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6245 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6246 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6247 for (i=0;i<pcbddc->local_primal_size;i++) { 6248 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6249 } 6250 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6251 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6252 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6253 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6254 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6255 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6256 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6257 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6258 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6259 for (i=0;i<pcis->n;i++) { 6260 if (array[i] != 0.0 && array[i] != array2[i]) { 6261 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6262 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6263 set_error = PETSC_TRUE; 6264 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6266 } 6267 } 6268 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6269 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6271 for (i=0;i<pcis->n;i++) { 6272 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6273 } 6274 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6275 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6276 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6277 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6278 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6280 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6281 PetscInt *gidxs; 6282 6283 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6284 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6285 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6286 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6288 for (i=0;i<pcbddc->local_primal_size;i++) { 6289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6290 } 6291 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6292 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6293 } 6294 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6295 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6296 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6297 } 6298 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6299 /* get back data */ 6300 *coarse_size_n = coarse_size; 6301 *local_primal_indices_n = local_primal_indices; 6302 PetscFunctionReturn(0); 6303 } 6304 6305 #undef __FUNCT__ 6306 #define __FUNCT__ "PCBDDCGlobalToLocal" 6307 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6308 { 6309 IS localis_t; 6310 PetscInt i,lsize,*idxs,n; 6311 PetscScalar *vals; 6312 PetscErrorCode ierr; 6313 6314 PetscFunctionBegin; 6315 /* get indices in local ordering exploiting local to global map */ 6316 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6317 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6318 for (i=0;i<lsize;i++) vals[i] = 1.0; 6319 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6320 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6321 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6322 if (idxs) { /* multilevel guard */ 6323 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6324 } 6325 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6326 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6327 ierr = PetscFree(vals);CHKERRQ(ierr); 6328 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6329 /* now compute set in local ordering */ 6330 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6331 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6332 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6333 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6334 for (i=0,lsize=0;i<n;i++) { 6335 if (PetscRealPart(vals[i]) > 0.5) { 6336 lsize++; 6337 } 6338 } 6339 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6340 for (i=0,lsize=0;i<n;i++) { 6341 if (PetscRealPart(vals[i]) > 0.5) { 6342 idxs[lsize++] = i; 6343 } 6344 } 6345 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6346 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6347 *localis = localis_t; 6348 PetscFunctionReturn(0); 6349 } 6350 6351 #undef __FUNCT__ 6352 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6353 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6354 { 6355 PC_IS *pcis=(PC_IS*)pc->data; 6356 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6357 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6358 Mat S_j; 6359 PetscInt *used_xadj,*used_adjncy; 6360 PetscBool free_used_adj; 6361 PetscErrorCode ierr; 6362 6363 PetscFunctionBegin; 6364 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6365 free_used_adj = PETSC_FALSE; 6366 if (pcbddc->sub_schurs_layers == -1) { 6367 used_xadj = NULL; 6368 used_adjncy = NULL; 6369 } else { 6370 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6371 used_xadj = pcbddc->mat_graph->xadj; 6372 used_adjncy = pcbddc->mat_graph->adjncy; 6373 } else if (pcbddc->computed_rowadj) { 6374 used_xadj = pcbddc->mat_graph->xadj; 6375 used_adjncy = pcbddc->mat_graph->adjncy; 6376 } else { 6377 PetscBool flg_row=PETSC_FALSE; 6378 const PetscInt *xadj,*adjncy; 6379 PetscInt nvtxs; 6380 6381 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6382 if (flg_row) { 6383 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6384 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6385 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6386 free_used_adj = PETSC_TRUE; 6387 } else { 6388 pcbddc->sub_schurs_layers = -1; 6389 used_xadj = NULL; 6390 used_adjncy = NULL; 6391 } 6392 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6393 } 6394 } 6395 6396 /* setup sub_schurs data */ 6397 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6398 if (!sub_schurs->schur_explicit) { 6399 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6400 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6401 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6402 } else { 6403 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6404 PetscBool isseqaij,need_change = PETSC_FALSE;; 6405 PetscInt benign_n; 6406 Mat change = NULL; 6407 Vec scaling = NULL; 6408 IS change_primal = NULL; 6409 6410 if (!pcbddc->use_vertices && reuse_solvers) { 6411 PetscInt n_vertices; 6412 6413 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6414 reuse_solvers = (PetscBool)!n_vertices; 6415 } 6416 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6417 if (!isseqaij) { 6418 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6419 if (matis->A == pcbddc->local_mat) { 6420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6421 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6422 } else { 6423 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6424 } 6425 } 6426 if (!pcbddc->benign_change_explicit) { 6427 benign_n = pcbddc->benign_n; 6428 } else { 6429 benign_n = 0; 6430 } 6431 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6432 We need a global reduction to avoid possible deadlocks. 6433 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6434 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6435 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6436 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6437 need_change = (PetscBool)(!need_change); 6438 } 6439 /* If the user defines additional constraints, we import them here. 6440 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6441 if (need_change) { 6442 PC_IS *pcisf; 6443 PC_BDDC *pcbddcf; 6444 PC pcf; 6445 6446 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6447 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6448 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6449 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6450 /* hacks */ 6451 pcisf = (PC_IS*)pcf->data; 6452 pcisf->is_B_local = pcis->is_B_local; 6453 pcisf->vec1_N = pcis->vec1_N; 6454 pcisf->BtoNmap = pcis->BtoNmap; 6455 pcisf->n = pcis->n; 6456 pcisf->n_B = pcis->n_B; 6457 pcbddcf = (PC_BDDC*)pcf->data; 6458 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6459 pcbddcf->mat_graph = pcbddc->mat_graph; 6460 pcbddcf->use_faces = PETSC_TRUE; 6461 pcbddcf->use_change_of_basis = PETSC_TRUE; 6462 pcbddcf->use_change_on_faces = PETSC_TRUE; 6463 pcbddcf->use_qr_single = PETSC_TRUE; 6464 pcbddcf->fake_change = PETSC_TRUE; 6465 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6466 /* store information on primal vertices and change of basis (in local numbering) */ 6467 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6468 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6469 change = pcbddcf->ConstraintMatrix; 6470 pcbddcf->ConstraintMatrix = NULL; 6471 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6472 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6473 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6474 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6475 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6476 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6477 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6478 pcf->ops->destroy = NULL; 6479 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6480 } 6481 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6482 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6483 ierr = MatDestroy(&change);CHKERRQ(ierr); 6484 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6485 } 6486 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6487 6488 /* free adjacency */ 6489 if (free_used_adj) { 6490 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6491 } 6492 PetscFunctionReturn(0); 6493 } 6494 6495 #undef __FUNCT__ 6496 #define __FUNCT__ "PCBDDCInitSubSchurs" 6497 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6498 { 6499 PC_IS *pcis=(PC_IS*)pc->data; 6500 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6501 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6502 PCBDDCGraph graph; 6503 PetscErrorCode ierr; 6504 6505 PetscFunctionBegin; 6506 /* attach interface graph for determining subsets */ 6507 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6508 IS verticesIS,verticescomm; 6509 PetscInt vsize,*idxs; 6510 6511 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6512 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6513 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6514 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6515 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6516 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6517 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6518 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6519 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6520 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6521 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6522 } else { 6523 graph = pcbddc->mat_graph; 6524 } 6525 /* print some info */ 6526 if (pcbddc->dbg_flag) { 6527 IS vertices; 6528 PetscInt nv,nedges,nfaces; 6529 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6530 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6531 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6532 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6533 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6534 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6536 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6537 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6538 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6539 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6540 } 6541 6542 /* sub_schurs init */ 6543 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6544 6545 /* free graph struct */ 6546 if (pcbddc->sub_schurs_rebuild) { 6547 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6548 } 6549 PetscFunctionReturn(0); 6550 } 6551 6552 #undef __FUNCT__ 6553 #define __FUNCT__ "PCBDDCCheckOperator" 6554 PetscErrorCode PCBDDCCheckOperator(PC pc) 6555 { 6556 PC_IS *pcis=(PC_IS*)pc->data; 6557 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6558 PetscErrorCode ierr; 6559 6560 PetscFunctionBegin; 6561 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6562 IS zerodiag = NULL; 6563 Mat S_j,B0_B=NULL; 6564 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6565 PetscScalar *p0_check,*array,*array2; 6566 PetscReal norm; 6567 PetscInt i; 6568 6569 /* B0 and B0_B */ 6570 if (zerodiag) { 6571 IS dummy; 6572 6573 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6574 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6575 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6576 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6577 } 6578 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6579 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6580 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6581 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6582 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6583 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6584 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6585 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6586 /* S_j */ 6587 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6588 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6589 6590 /* mimic vector in \widetilde{W}_\Gamma */ 6591 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6592 /* continuous in primal space */ 6593 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6594 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6595 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6596 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6597 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6598 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6599 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6600 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6601 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6602 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6603 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6604 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6605 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6606 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6607 6608 /* assemble rhs for coarse problem */ 6609 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6610 /* local with Schur */ 6611 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6612 if (zerodiag) { 6613 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6614 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6615 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6616 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6617 } 6618 /* sum on primal nodes the local contributions */ 6619 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6620 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6621 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6622 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6623 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6624 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6625 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6626 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6627 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6628 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6629 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6630 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6631 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6632 /* scale primal nodes (BDDC sums contibutions) */ 6633 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6634 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6635 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6636 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6637 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6638 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6639 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6640 /* global: \widetilde{B0}_B w_\Gamma */ 6641 if (zerodiag) { 6642 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6643 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6644 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6645 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6646 } 6647 /* BDDC */ 6648 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6649 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6650 6651 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6652 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6653 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6654 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6655 for (i=0;i<pcbddc->benign_n;i++) { 6656 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6657 } 6658 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6659 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6660 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6661 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6662 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6663 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6664 } 6665 PetscFunctionReturn(0); 6666 } 6667