1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 9 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 10 { 11 PetscErrorCode ierr; 12 Vec local,global; 13 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 14 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 15 16 PetscFunctionBegin; 17 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 18 /* need to convert from global to local topology information and remove references to information in global ordering */ 19 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 20 if (pcbddc->user_provided_isfordofs) { 21 if (pcbddc->n_ISForDofs) { 22 PetscInt i; 23 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 24 for (i=0;i<pcbddc->n_ISForDofs;i++) { 25 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 26 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 27 } 28 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 29 pcbddc->n_ISForDofs = 0; 30 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 31 } 32 } else { 33 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 34 PetscInt i, n = matis->A->rmap->n; 35 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 36 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 37 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 38 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 39 } 40 } 41 } 42 43 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 44 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 45 } 46 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 47 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 48 } 49 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 50 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 51 } 52 ierr = VecDestroy(&global);CHKERRQ(ierr); 53 ierr = VecDestroy(&local);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 #undef __FUNCT__ 58 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 59 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 60 { 61 PC_IS *pcis = (PC_IS*)(pc->data); 62 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 if (!pcbddc->benign_have_null) { 67 PetscFunctionReturn(0); 68 } 69 if (pcbddc->ChangeOfBasisMatrix) { 70 Vec swap; 71 72 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 73 swap = pcbddc->work_change; 74 pcbddc->work_change = r; 75 r = swap; 76 } 77 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 78 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79 if (pcbddc->benign_n) { 80 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 81 } else { 82 ierr = VecSet(pcis->vec2_D,0.);CHKERRQ(ierr); 83 } 84 ierr = VecSet(z,0.);CHKERRQ(ierr); 85 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 86 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 87 if (pcbddc->ChangeOfBasisMatrix) { 88 Vec swap; 89 90 swap = r; 91 r = pcbddc->work_change; 92 pcbddc->work_change = swap; 93 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 94 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 101 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 102 { 103 PCBDDCBenignMatMult_ctx ctx; 104 PetscErrorCode ierr; 105 PetscBool apply_right,apply_left,reset_x; 106 107 PetscFunctionBegin; 108 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 109 if (transpose) { 110 apply_right = ctx->apply_left; 111 apply_left = ctx->apply_right; 112 } else { 113 apply_right = ctx->apply_right; 114 apply_left = ctx->apply_left; 115 } 116 reset_x = PETSC_FALSE; 117 if (apply_right) { 118 const PetscScalar *ax; 119 PetscInt nl,i; 120 121 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 122 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 123 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 125 for (i=0;i<ctx->benign_n;i++) { 126 PetscScalar sum,val; 127 const PetscInt *idxs; 128 PetscInt nz,j; 129 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 130 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 131 sum = 0.; 132 if (ctx->apply_p0) { 133 val = ctx->work[idxs[nz-1]]; 134 for (j=0;j<nz-1;j++) { 135 sum += ctx->work[idxs[j]]; 136 ctx->work[idxs[j]] += val; 137 } 138 } else { 139 for (j=0;j<nz-1;j++) { 140 sum += ctx->work[idxs[j]]; 141 } 142 } 143 ctx->work[idxs[nz-1]] -= sum; 144 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 145 } 146 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 147 reset_x = PETSC_TRUE; 148 } 149 if (transpose) { 150 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 151 } else { 152 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 153 } 154 if (reset_x) { 155 ierr = VecResetArray(x);CHKERRQ(ierr); 156 } 157 if (apply_left) { 158 PetscScalar *ay; 159 PetscInt i; 160 161 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 162 for (i=0;i<ctx->benign_n;i++) { 163 PetscScalar sum,val; 164 const PetscInt *idxs; 165 PetscInt nz,j; 166 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 167 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 168 val = -ay[idxs[nz-1]]; 169 if (ctx->apply_p0) { 170 sum = 0.; 171 for (j=0;j<nz-1;j++) { 172 sum += ay[idxs[j]]; 173 ay[idxs[j]] += val; 174 } 175 ay[idxs[nz-1]] += sum; 176 } else { 177 for (j=0;j<nz-1;j++) { 178 ay[idxs[j]] += val; 179 } 180 ay[idxs[nz-1]] = 0.; 181 } 182 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 183 } 184 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 191 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 192 { 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 202 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 203 { 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "PCBDDCBenignShellMat" 213 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 214 { 215 PC_IS *pcis = (PC_IS*)pc->data; 216 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 217 PCBDDCBenignMatMult_ctx ctx; 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 if (!restore) { 222 Mat A_IB,A_BI; 223 PetscScalar *work; 224 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 225 226 if (pcbddc->benign_original_mat) { 227 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 228 } 229 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 230 PetscFunctionReturn(0); 231 } 232 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 233 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 234 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 235 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 236 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 237 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 238 ierr = PetscNew(&ctx);CHKERRQ(ierr); 239 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 240 ctx->apply_left = PETSC_TRUE; 241 ctx->apply_right = PETSC_FALSE; 242 ctx->apply_p0 = PETSC_FALSE; 243 ctx->benign_n = pcbddc->benign_n; 244 if (reuse) { 245 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 246 ctx->free = PETSC_FALSE; 247 } else { /* TODO: could be optimized for successive solves */ 248 ISLocalToGlobalMapping N_to_D; 249 PetscInt i; 250 251 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 252 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 253 for (i=0;i<pcbddc->benign_n;i++) { 254 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 255 } 256 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 257 ctx->free = PETSC_TRUE; 258 } 259 ctx->A = pcis->A_IB; 260 ctx->work = work; 261 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 262 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 263 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 264 pcis->A_IB = A_IB; 265 266 /* A_BI as A_IB^T */ 267 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 268 pcbddc->benign_original_mat = pcis->A_BI; 269 pcis->A_BI = A_BI; 270 } else { 271 if (!pcbddc->benign_original_mat) { 272 PetscFunctionReturn(0); 273 } 274 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 275 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 276 pcis->A_IB = ctx->A; 277 ctx->A = NULL; 278 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 279 pcis->A_BI = pcbddc->benign_original_mat; 280 pcbddc->benign_original_mat = NULL; 281 if (ctx->free) { 282 PetscInt i; 283 for (i=0;i<ctx->benign_n;i++) { 284 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 285 } 286 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 287 } 288 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 289 ierr = PetscFree(ctx);CHKERRQ(ierr); 290 } 291 PetscFunctionReturn(0); 292 } 293 294 /* used just in bddc debug mode */ 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCBenignProject" 297 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 298 { 299 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 300 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 301 Mat An; 302 PetscErrorCode ierr; 303 304 PetscFunctionBegin; 305 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 306 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 307 if (is1) { 308 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 309 ierr = MatDestroy(&An);CHKERRQ(ierr); 310 } else { 311 *B = An; 312 } 313 PetscFunctionReturn(0); 314 } 315 316 /* TODO: add reuse flag */ 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatSeqAIJCompress" 319 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 320 { 321 Mat Bt; 322 PetscScalar *a,*bdata; 323 const PetscInt *ii,*ij; 324 PetscInt m,n,i,nnz,*bii,*bij; 325 PetscBool flg_row; 326 PetscErrorCode ierr; 327 328 PetscFunctionBegin; 329 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 330 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 331 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 332 nnz = n; 333 for (i=0;i<ii[n];i++) { 334 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 335 } 336 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 337 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 338 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 339 nnz = 0; 340 bii[0] = 0; 341 for (i=0;i<n;i++) { 342 PetscInt j; 343 for (j=ii[i];j<ii[i+1];j++) { 344 PetscScalar entry = a[j]; 345 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 346 bij[nnz] = ij[j]; 347 bdata[nnz] = entry; 348 nnz++; 349 } 350 } 351 bii[i+1] = nnz; 352 } 353 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 354 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 355 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 356 { 357 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 358 b->free_a = PETSC_TRUE; 359 b->free_ij = PETSC_TRUE; 360 } 361 *B = Bt; 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatDetectDisconnectedComponents" 367 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 368 { 369 Mat B; 370 IS is_dummy,*cc_n; 371 ISLocalToGlobalMapping l2gmap_dummy; 372 PCBDDCGraph graph; 373 PetscInt i,n; 374 PetscInt *xadj,*adjncy; 375 PetscInt *xadj_filtered,*adjncy_filtered; 376 PetscBool flg_row,isseqaij; 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 if (!A->rmap->N || !A->cmap->N) { 381 *ncc = 0; 382 *cc = NULL; 383 PetscFunctionReturn(0); 384 } 385 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 386 if (!isseqaij && filter) { 387 PetscBool isseqdense; 388 389 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 390 if (!isseqdense) { 391 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 392 } else { /* TODO: rectangular case and LDA */ 393 PetscScalar *array; 394 PetscReal chop=1.e-6; 395 396 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 397 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 398 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 399 for (i=0;i<n;i++) { 400 PetscInt j; 401 for (j=i+1;j<n;j++) { 402 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 403 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 404 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 405 } 406 } 407 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 408 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 409 } 410 } else { 411 B = A; 412 } 413 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 414 415 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 416 if (filter) { 417 PetscScalar *data; 418 PetscInt j,cum; 419 420 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 421 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 422 cum = 0; 423 for (i=0;i<n;i++) { 424 PetscInt t; 425 426 for (j=xadj[i];j<xadj[i+1];j++) { 427 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 428 continue; 429 } 430 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 431 } 432 t = xadj_filtered[i]; 433 xadj_filtered[i] = cum; 434 cum += t; 435 } 436 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 437 } else { 438 xadj_filtered = NULL; 439 adjncy_filtered = NULL; 440 } 441 442 /* compute local connected components using PCBDDCGraph */ 443 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 444 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 445 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 446 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 447 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 448 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 449 if (xadj_filtered) { 450 graph->xadj = xadj_filtered; 451 graph->adjncy = adjncy_filtered; 452 } else { 453 graph->xadj = xadj; 454 graph->adjncy = adjncy; 455 } 456 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 457 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 458 /* partial clean up */ 459 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 460 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 461 if (A != B) { 462 ierr = MatDestroy(&B);CHKERRQ(ierr); 463 } 464 465 /* get back data */ 466 if (ncc) *ncc = graph->ncc; 467 if (cc) { 468 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 469 for (i=0;i<graph->ncc;i++) { 470 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 471 } 472 *cc = cc_n; 473 } 474 /* clean up graph */ 475 graph->xadj = 0; 476 graph->adjncy = 0; 477 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCBDDCBenignCheck" 483 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 484 { 485 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 486 PC_IS* pcis = (PC_IS*)(pc->data); 487 IS dirIS = NULL; 488 PetscInt i; 489 PetscErrorCode ierr; 490 491 PetscFunctionBegin; 492 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 493 if (zerodiag) { 494 Mat A; 495 Vec vec3_N; 496 PetscScalar *vals; 497 const PetscInt *idxs; 498 PetscInt nz,*count; 499 500 /* p0 */ 501 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 502 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 503 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 504 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 505 for (i=0;i<nz;i++) vals[i] = 1.; 506 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 507 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 508 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 509 /* v_I */ 510 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 511 for (i=0;i<nz;i++) vals[i] = 0.; 512 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 514 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 515 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 516 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 518 if (dirIS) { 519 PetscInt n; 520 521 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 522 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 523 for (i=0;i<n;i++) vals[i] = 0.; 524 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 525 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 526 } 527 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 528 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 529 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 530 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 531 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 532 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 533 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 534 if (PetscAbsScalar(vals[0]) > 1.e-1) { 535 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 536 } 537 ierr = PetscFree(vals);CHKERRQ(ierr); 538 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 539 540 /* there should not be any pressure dofs lying on the interface */ 541 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 542 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 543 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 544 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 545 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 546 for (i=0;i<nz;i++) { 547 if (count[idxs[i]]) { 548 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 549 } 550 } 551 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 552 ierr = PetscFree(count);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 555 556 /* check PCBDDCBenignGetOrSetP0 */ 557 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 558 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 559 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 560 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 561 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 562 for (i=0;i<pcbddc->benign_n;i++) { 563 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 564 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 572 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 573 { 574 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 575 IS pressures,zerodiag,*zerodiag_subs; 576 PetscInt nz,n; 577 PetscInt *interior_dofs,n_interior_dofs; 578 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 579 PetscErrorCode ierr; 580 581 PetscFunctionBegin; 582 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 583 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 584 for (n=0;n<pcbddc->benign_n;n++) { 585 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 586 } 587 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 588 pcbddc->benign_n = 0; 589 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 590 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 591 Checks if all the pressure dofs in each subdomain have a zero diagonal 592 If not, a change of basis on pressures is not needed 593 since the local Schur complements are SPD 594 */ 595 has_null_pressures = PETSC_TRUE; 596 have_null = PETSC_TRUE; 597 if (pcbddc->n_ISForDofsLocal) { 598 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 599 600 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 601 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 602 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 603 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 604 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 605 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 606 if (!sorted) { 607 ierr = ISSort(pressures);CHKERRQ(ierr); 608 } 609 } else { 610 pressures = NULL; 611 } 612 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 613 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 614 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 615 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 616 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 617 if (!sorted) { 618 ierr = ISSort(zerodiag);CHKERRQ(ierr); 619 } 620 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 621 if (!nz) { 622 if (n) have_null = PETSC_FALSE; 623 has_null_pressures = PETSC_FALSE; 624 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 625 } 626 recompute_zerodiag = PETSC_FALSE; 627 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 628 zerodiag_subs = NULL; 629 pcbddc->benign_n = 0; 630 n_interior_dofs = 0; 631 interior_dofs = NULL; 632 if (pcbddc->current_level) { /* need to compute interior nodes */ 633 PetscInt n,i,j; 634 PetscInt n_neigh,*neigh,*n_shared,**shared; 635 PetscInt *iwork; 636 637 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 638 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 639 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 640 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 641 for (i=0;i<n_neigh;i++) 642 for (j=0;j<n_shared[i];j++) 643 iwork[shared[i][j]] += 1; 644 for (i=0;i<n;i++) 645 if (!iwork[i]) 646 interior_dofs[n_interior_dofs++] = i; 647 ierr = PetscFree(iwork);CHKERRQ(ierr); 648 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 649 } 650 if (has_null_pressures) { 651 IS *subs; 652 PetscInt nsubs,i,j,nl; 653 const PetscInt *idxs; 654 PetscScalar *array; 655 Vec *work; 656 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 657 658 subs = pcbddc->local_subs; 659 nsubs = pcbddc->n_local_subs; 660 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 661 if (pcbddc->current_level) { 662 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 663 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 664 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 665 /* work[0] = 1_p */ 666 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 667 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 668 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 669 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 670 /* work[0] = 1_v */ 671 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 672 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 673 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 674 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 675 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 676 } 677 if (nsubs > 1) { 678 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 679 for (i=0;i<nsubs;i++) { 680 ISLocalToGlobalMapping l2g; 681 IS t_zerodiag_subs; 682 PetscInt nl; 683 684 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 685 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 686 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 687 if (nl) { 688 PetscBool valid = PETSC_TRUE; 689 690 if (pcbddc->current_level) { 691 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 692 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 693 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 694 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 695 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 696 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 697 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 698 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 699 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 700 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 701 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 702 for (j=0;j<n_interior_dofs;j++) { 703 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 704 valid = PETSC_FALSE; 705 break; 706 } 707 } 708 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 709 } 710 if (valid && pcbddc->NeumannBoundariesLocal) { 711 IS t_bc; 712 PetscInt nzb; 713 714 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 715 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 716 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 717 if (nzb) valid = PETSC_FALSE; 718 } 719 if (valid && pressures) { 720 IS t_pressure_subs; 721 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 722 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 723 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 724 } 725 if (valid) { 726 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 727 pcbddc->benign_n++; 728 } else { 729 recompute_zerodiag = PETSC_TRUE; 730 } 731 } 732 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 733 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 734 } 735 } else { /* there's just one subdomain (or zero if they have not been detected */ 736 PetscBool valid = PETSC_TRUE; 737 738 if (pcbddc->NeumannBoundariesLocal) { 739 PetscInt nzb; 740 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 741 if (nzb) valid = PETSC_FALSE; 742 } 743 if (valid && pressures) { 744 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 745 } 746 if (valid && pcbddc->current_level) { 747 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 748 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 749 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 750 for (j=0;j<n_interior_dofs;j++) { 751 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 752 valid = PETSC_FALSE; 753 break; 754 } 755 } 756 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 757 } 758 if (valid) { 759 pcbddc->benign_n = 1; 760 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 761 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 762 zerodiag_subs[0] = zerodiag; 763 } 764 } 765 if (pcbddc->current_level) { 766 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 767 } 768 } 769 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 770 771 if (!pcbddc->benign_n) { 772 PetscInt n; 773 774 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 775 recompute_zerodiag = PETSC_FALSE; 776 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 777 if (n) { 778 has_null_pressures = PETSC_FALSE; 779 have_null = PETSC_FALSE; 780 } 781 } 782 783 /* final check for null pressures */ 784 if (zerodiag && pressures) { 785 PetscInt nz,np; 786 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 787 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 788 if (nz != np) have_null = PETSC_FALSE; 789 } 790 791 if (recompute_zerodiag) { 792 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 793 if (pcbddc->benign_n == 1) { 794 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 795 zerodiag = zerodiag_subs[0]; 796 } else { 797 PetscInt i,nzn,*new_idxs; 798 799 nzn = 0; 800 for (i=0;i<pcbddc->benign_n;i++) { 801 PetscInt ns; 802 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 803 nzn += ns; 804 } 805 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 806 nzn = 0; 807 for (i=0;i<pcbddc->benign_n;i++) { 808 PetscInt ns,*idxs; 809 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 810 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 811 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 812 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 813 nzn += ns; 814 } 815 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 816 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 817 } 818 have_null = PETSC_FALSE; 819 } 820 821 /* no-net-flux */ 822 if (pcbddc->benign_compute_nonetflux) { 823 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 824 MatNullSpace near_null_space; 825 ISLocalToGlobalMapping rmap; 826 Vec quad_vec; 827 PetscScalar *pvals; 828 PetscInt i,np,*dummyins; 829 IS isused = NULL; 830 PetscBool participate = PETSC_TRUE; 831 832 /* create vector to hold quadrature weights */ 833 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 834 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 835 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 836 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 837 838 /* compute B^{(i)T} * 1_p */ 839 np = 0; 840 if (pressures) { 841 isused = pressures; 842 } else { 843 isused = zerodiag; 844 } 845 if (isused) { 846 ierr = ISGetLocalSize(isused,&np);CHKERRQ(ierr); 847 } 848 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 849 for (i=0;i<np;i++) pvals[i] = 1.; 850 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 851 if (np) { 852 const PetscInt *pidxs; 853 854 if (isused) { 855 ierr = ISGetIndices(isused,&pidxs);CHKERRQ(ierr); 856 } 857 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 858 if (isused) { 859 ierr = ISRestoreIndices(isused,&pidxs);CHKERRQ(ierr); 860 } 861 } 862 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 863 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 864 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 865 ierr = PetscFree(pvals);CHKERRQ(ierr); 866 if (!isused) participate = PETSC_FALSE; 867 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 868 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 869 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 870 for (i=0;i<matis->sf->nleaves;i++) 871 if (participate) matis->sf_leafdata[i] = PetscGlobalRank; 872 else matis->sf_leafdata[i] = -1; 873 874 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 875 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 876 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 877 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 878 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 879 for (i=0;i<matis->sf->nleaves;i++) { 880 if (PetscGlobalRank != matis->sf_leafdata[i]) { 881 pvals[i] = 0.; 882 } 883 } 884 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 885 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 886 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 887 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 888 ierr = PetscFree(dummyins);CHKERRQ(ierr); 889 890 /* assembly quadrature vec and attach near null space to pmat */ 891 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 892 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 893 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 894 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 895 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 896 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 897 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 898 } 899 900 /* change of basis and p0 dofs */ 901 if (has_null_pressures) { 902 IS zerodiagc; 903 const PetscInt *idxs,*idxsc; 904 PetscInt i,s,*nnz; 905 906 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 907 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 908 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 909 /* local change of basis for pressures */ 910 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 911 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 912 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 913 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 914 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 915 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 916 for (i=0;i<pcbddc->benign_n;i++) { 917 PetscInt nzs,j; 918 919 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 920 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 921 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 922 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 923 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 924 } 925 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 926 ierr = PetscFree(nnz);CHKERRQ(ierr); 927 /* set identity on velocities */ 928 for (i=0;i<n-nz;i++) { 929 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 930 } 931 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 932 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 933 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 934 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 935 /* set change on pressures */ 936 for (s=0;s<pcbddc->benign_n;s++) { 937 PetscScalar *array; 938 PetscInt nzs; 939 940 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 941 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 942 for (i=0;i<nzs-1;i++) { 943 PetscScalar vals[2]; 944 PetscInt cols[2]; 945 946 cols[0] = idxs[i]; 947 cols[1] = idxs[nzs-1]; 948 vals[0] = 1.; 949 vals[1] = 1.; 950 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 951 } 952 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 953 for (i=0;i<nzs-1;i++) array[i] = -1.; 954 array[nzs-1] = 1.; 955 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 956 /* store local idxs for p0 */ 957 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 958 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 959 ierr = PetscFree(array);CHKERRQ(ierr); 960 } 961 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 963 /* project if needed */ 964 if (pcbddc->benign_change_explicit) { 965 Mat M; 966 967 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 968 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 969 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 970 ierr = MatDestroy(&M);CHKERRQ(ierr); 971 } 972 /* store global idxs for p0 */ 973 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 974 } 975 pcbddc->benign_zerodiag_subs = zerodiag_subs; 976 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 977 978 /* determines if the coarse solver will be singular or not */ 979 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 980 /* determines if the problem has subdomains with 0 pressure block */ 981 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 982 *zerodiaglocal = zerodiag; 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 988 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 989 { 990 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 991 PetscScalar *array; 992 PetscErrorCode ierr; 993 994 PetscFunctionBegin; 995 if (!pcbddc->benign_sf) { 996 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 997 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 998 } 999 if (get) { 1000 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1001 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1002 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1003 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1004 } else { 1005 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1006 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1007 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1008 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1015 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1016 { 1017 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 /* TODO: add error checking 1022 - avoid nested pop (or push) calls. 1023 - cannot push before pop. 1024 - cannot call this if pcbddc->local_mat is NULL 1025 */ 1026 if (!pcbddc->benign_n) { 1027 PetscFunctionReturn(0); 1028 } 1029 if (pop) { 1030 if (pcbddc->benign_change_explicit) { 1031 IS is_p0; 1032 MatReuse reuse; 1033 1034 /* extract B_0 */ 1035 reuse = MAT_INITIAL_MATRIX; 1036 if (pcbddc->benign_B0) { 1037 reuse = MAT_REUSE_MATRIX; 1038 } 1039 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1040 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1041 /* remove rows and cols from local problem */ 1042 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1043 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1044 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1045 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1046 } else { 1047 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1048 PetscScalar *vals; 1049 PetscInt i,n,*idxs_ins; 1050 1051 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1052 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1053 if (!pcbddc->benign_B0) { 1054 PetscInt *nnz; 1055 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1056 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1057 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1058 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1059 for (i=0;i<pcbddc->benign_n;i++) { 1060 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1061 nnz[i] = n - nnz[i]; 1062 } 1063 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1064 ierr = PetscFree(nnz);CHKERRQ(ierr); 1065 } 1066 1067 for (i=0;i<pcbddc->benign_n;i++) { 1068 PetscScalar *array; 1069 PetscInt *idxs,j,nz,cum; 1070 1071 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1072 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1073 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1074 for (j=0;j<nz;j++) vals[j] = 1.; 1075 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1076 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1077 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1078 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1079 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1080 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1081 cum = 0; 1082 for (j=0;j<n;j++) { 1083 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1084 vals[cum] = array[j]; 1085 idxs_ins[cum] = j; 1086 cum++; 1087 } 1088 } 1089 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1090 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1091 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1092 } 1093 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1094 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1095 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1096 } 1097 } else { /* push */ 1098 if (pcbddc->benign_change_explicit) { 1099 PetscInt i; 1100 1101 for (i=0;i<pcbddc->benign_n;i++) { 1102 PetscScalar *B0_vals; 1103 PetscInt *B0_cols,B0_ncol; 1104 1105 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1106 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1107 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1108 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1109 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1110 } 1111 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1112 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1113 } else { 1114 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1115 } 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1122 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1123 { 1124 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1125 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1126 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1127 PetscBLASInt *B_iwork,*B_ifail; 1128 PetscScalar *work,lwork; 1129 PetscScalar *St,*S,*eigv; 1130 PetscScalar *Sarray,*Starray; 1131 PetscReal *eigs,thresh; 1132 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1133 PetscBool allocated_S_St; 1134 #if defined(PETSC_USE_COMPLEX) 1135 PetscReal *rwork; 1136 #endif 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1141 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1142 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1143 1144 if (pcbddc->dbg_flag) { 1145 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1147 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1148 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1149 } 1150 1151 if (pcbddc->dbg_flag) { 1152 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1153 } 1154 1155 /* max size of subsets */ 1156 mss = 0; 1157 for (i=0;i<sub_schurs->n_subs;i++) { 1158 PetscInt subset_size; 1159 1160 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1161 mss = PetscMax(mss,subset_size); 1162 } 1163 1164 /* min/max and threshold */ 1165 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1166 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1167 nmax = PetscMax(nmin,nmax); 1168 allocated_S_St = PETSC_FALSE; 1169 if (nmin) { 1170 allocated_S_St = PETSC_TRUE; 1171 } 1172 1173 /* allocate lapack workspace */ 1174 cum = cum2 = 0; 1175 maxneigs = 0; 1176 for (i=0;i<sub_schurs->n_subs;i++) { 1177 PetscInt n,subset_size; 1178 1179 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1180 n = PetscMin(subset_size,nmax); 1181 cum += subset_size; 1182 cum2 += subset_size*n; 1183 maxneigs = PetscMax(maxneigs,n); 1184 } 1185 if (mss) { 1186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1187 PetscBLASInt B_itype = 1; 1188 PetscBLASInt B_N = mss; 1189 PetscReal zero = 0.0; 1190 PetscReal eps = 0.0; /* dlamch? */ 1191 1192 B_lwork = -1; 1193 S = NULL; 1194 St = NULL; 1195 eigs = NULL; 1196 eigv = NULL; 1197 B_iwork = NULL; 1198 B_ifail = NULL; 1199 #if defined(PETSC_USE_COMPLEX) 1200 rwork = NULL; 1201 #endif 1202 thresh = 1.0; 1203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_COMPLEX) 1205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1206 #else 1207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1208 #endif 1209 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1211 } else { 1212 /* TODO */ 1213 } 1214 } else { 1215 lwork = 0; 1216 } 1217 1218 nv = 0; 1219 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1220 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1221 } 1222 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1223 if (allocated_S_St) { 1224 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1225 } 1226 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1227 #if defined(PETSC_USE_COMPLEX) 1228 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1229 #endif 1230 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1231 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1232 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1233 nv+cum,&pcbddc->adaptive_constraints_idxs, 1234 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1235 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1236 1237 maxneigs = 0; 1238 cum = cumarray = 0; 1239 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1240 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1241 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1242 const PetscInt *idxs; 1243 1244 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1245 for (cum=0;cum<nv;cum++) { 1246 pcbddc->adaptive_constraints_n[cum] = 1; 1247 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1248 pcbddc->adaptive_constraints_data[cum] = 1.0; 1249 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1250 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1251 } 1252 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1253 } 1254 1255 if (mss) { /* multilevel */ 1256 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1257 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1258 } 1259 1260 thresh = pcbddc->adaptive_threshold; 1261 for (i=0;i<sub_schurs->n_subs;i++) { 1262 const PetscInt *idxs; 1263 PetscReal upper,lower; 1264 PetscInt j,subset_size,eigs_start = 0; 1265 PetscBLASInt B_N; 1266 PetscBool same_data = PETSC_FALSE; 1267 1268 if (pcbddc->use_deluxe_scaling) { 1269 upper = PETSC_MAX_REAL; 1270 lower = thresh; 1271 } else { 1272 upper = 1./thresh; 1273 lower = 0.; 1274 } 1275 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1276 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1277 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1278 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1279 if (sub_schurs->is_hermitian) { 1280 PetscInt j,k; 1281 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1282 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1283 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1284 } 1285 for (j=0;j<subset_size;j++) { 1286 for (k=j;k<subset_size;k++) { 1287 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1288 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1289 } 1290 } 1291 } else { 1292 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1293 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1294 } 1295 } else { 1296 S = Sarray + cumarray; 1297 St = Starray + cumarray; 1298 } 1299 /* see if we can save some work */ 1300 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1301 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1302 } 1303 1304 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1305 B_neigs = 0; 1306 } else { 1307 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1308 PetscBLASInt B_itype = 1; 1309 PetscBLASInt B_IL, B_IU; 1310 PetscReal eps = -1.0; /* dlamch? */ 1311 PetscInt nmin_s; 1312 PetscBool compute_range = PETSC_FALSE; 1313 1314 if (pcbddc->dbg_flag) { 1315 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1316 } 1317 1318 compute_range = PETSC_FALSE; 1319 if (thresh > 1.+PETSC_SMALL && !same_data) { 1320 compute_range = PETSC_TRUE; 1321 } 1322 1323 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1324 if (compute_range) { 1325 1326 /* ask for eigenvalues larger than thresh */ 1327 #if defined(PETSC_USE_COMPLEX) 1328 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1329 #else 1330 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1331 #endif 1332 } else if (!same_data) { 1333 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1334 B_IL = 1; 1335 #if defined(PETSC_USE_COMPLEX) 1336 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1337 #else 1338 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1339 #endif 1340 } else { /* same_data is true, so get the adaptive function requested by the user */ 1341 PetscInt k; 1342 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1343 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1344 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1345 nmin = nmax; 1346 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1347 for (k=0;k<nmax;k++) { 1348 eigs[k] = 1./PETSC_SMALL; 1349 eigv[k*(subset_size+1)] = 1.0; 1350 } 1351 } 1352 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1353 if (B_ierr) { 1354 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1355 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1356 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1357 } 1358 1359 if (B_neigs > nmax) { 1360 if (pcbddc->dbg_flag) { 1361 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1362 } 1363 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1364 B_neigs = nmax; 1365 } 1366 1367 nmin_s = PetscMin(nmin,B_N); 1368 if (B_neigs < nmin_s) { 1369 PetscBLASInt B_neigs2; 1370 1371 if (pcbddc->use_deluxe_scaling) { 1372 B_IL = B_N - nmin_s + 1; 1373 B_IU = B_N - B_neigs; 1374 } else { 1375 B_IL = B_neigs + 1; 1376 B_IU = nmin_s; 1377 } 1378 if (pcbddc->dbg_flag) { 1379 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1380 } 1381 if (sub_schurs->is_hermitian) { 1382 PetscInt j; 1383 for (j=0;j<subset_size;j++) { 1384 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1385 } 1386 for (j=0;j<subset_size;j++) { 1387 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1388 } 1389 } else { 1390 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1391 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1392 } 1393 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1394 #if defined(PETSC_USE_COMPLEX) 1395 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1396 #else 1397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1398 #endif 1399 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1400 B_neigs += B_neigs2; 1401 } 1402 if (B_ierr) { 1403 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1404 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1405 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1406 } 1407 if (pcbddc->dbg_flag) { 1408 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1409 for (j=0;j<B_neigs;j++) { 1410 if (eigs[j] == 0.0) { 1411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1412 } else { 1413 if (pcbddc->use_deluxe_scaling) { 1414 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1415 } else { 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1417 } 1418 } 1419 } 1420 } 1421 } else { 1422 /* TODO */ 1423 } 1424 } 1425 /* change the basis back to the original one */ 1426 if (sub_schurs->change) { 1427 Mat change,phi,phit; 1428 1429 if (pcbddc->dbg_flag > 1) { 1430 PetscInt ii; 1431 for (ii=0;ii<B_neigs;ii++) { 1432 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1433 for (j=0;j<B_N;j++) { 1434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1435 } 1436 } 1437 } 1438 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1439 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1440 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1441 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1442 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1443 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1444 } 1445 maxneigs = PetscMax(B_neigs,maxneigs); 1446 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1447 if (B_neigs) { 1448 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1449 1450 if (pcbddc->dbg_flag > 1) { 1451 PetscInt ii; 1452 for (ii=0;ii<B_neigs;ii++) { 1453 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1454 for (j=0;j<B_N;j++) { 1455 #if defined(PETSC_USE_COMPLEX) 1456 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1457 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1458 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1459 #else 1460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1461 #endif 1462 } 1463 } 1464 } 1465 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1466 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1467 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1468 cum++; 1469 } 1470 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1471 /* shift for next computation */ 1472 cumarray += subset_size*subset_size; 1473 } 1474 if (pcbddc->dbg_flag) { 1475 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1476 } 1477 1478 if (mss) { 1479 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1480 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1481 /* destroy matrices (junk) */ 1482 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1483 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1484 } 1485 if (allocated_S_St) { 1486 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1487 } 1488 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1489 #if defined(PETSC_USE_COMPLEX) 1490 ierr = PetscFree(rwork);CHKERRQ(ierr); 1491 #endif 1492 if (pcbddc->dbg_flag) { 1493 PetscInt maxneigs_r; 1494 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1495 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1496 } 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "PCBDDCSetUpSolvers" 1502 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1503 { 1504 PetscScalar *coarse_submat_vals; 1505 PetscErrorCode ierr; 1506 1507 PetscFunctionBegin; 1508 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1509 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1510 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1511 1512 /* Setup local neumann solver ksp_R */ 1513 /* PCBDDCSetUpLocalScatters should be called first! */ 1514 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1515 1516 /* 1517 Setup local correction and local part of coarse basis. 1518 Gives back the dense local part of the coarse matrix in column major ordering 1519 */ 1520 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1521 1522 /* Compute total number of coarse nodes and setup coarse solver */ 1523 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1524 1525 /* free */ 1526 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "PCBDDCResetCustomization" 1532 PetscErrorCode PCBDDCResetCustomization(PC pc) 1533 { 1534 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1539 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1540 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1541 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1542 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1543 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1544 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1545 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1546 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1547 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1548 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "PCBDDCResetTopography" 1554 PetscErrorCode PCBDDCResetTopography(PC pc) 1555 { 1556 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1557 PetscInt i; 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1562 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1563 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1564 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1565 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1566 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1567 for (i=0;i<pcbddc->n_local_subs;i++) { 1568 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1569 } 1570 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1571 if (pcbddc->sub_schurs) { 1572 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1573 } 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "PCBDDCResetSolvers" 1579 PetscErrorCode PCBDDCResetSolvers(PC pc) 1580 { 1581 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1586 if (pcbddc->coarse_phi_B) { 1587 PetscScalar *array; 1588 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1589 ierr = PetscFree(array);CHKERRQ(ierr); 1590 } 1591 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1592 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1593 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1594 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1595 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1596 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1597 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1598 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1599 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1600 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1601 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1602 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1603 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1604 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1605 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1606 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1607 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1608 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1609 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1610 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1611 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1612 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1613 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1614 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1615 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1616 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1617 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1618 if (pcbddc->benign_zerodiag_subs) { 1619 PetscInt i; 1620 for (i=0;i<pcbddc->benign_n;i++) { 1621 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1622 } 1623 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1624 } 1625 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1631 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1632 { 1633 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1634 PC_IS *pcis = (PC_IS*)pc->data; 1635 VecType impVecType; 1636 PetscInt n_constraints,n_R,old_size; 1637 PetscErrorCode ierr; 1638 1639 PetscFunctionBegin; 1640 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1641 /* get sizes */ 1642 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1643 n_R = pcis->n - pcbddc->n_vertices; 1644 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1645 /* local work vectors (try to avoid unneeded work)*/ 1646 /* R nodes */ 1647 old_size = -1; 1648 if (pcbddc->vec1_R) { 1649 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1650 } 1651 if (n_R != old_size) { 1652 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1653 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1654 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1655 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1656 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1657 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1658 } 1659 /* local primal dofs */ 1660 old_size = -1; 1661 if (pcbddc->vec1_P) { 1662 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1663 } 1664 if (pcbddc->local_primal_size != old_size) { 1665 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1666 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1667 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1668 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1669 } 1670 /* local explicit constraints */ 1671 old_size = -1; 1672 if (pcbddc->vec1_C) { 1673 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1674 } 1675 if (n_constraints && n_constraints != old_size) { 1676 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1677 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1678 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1679 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "PCBDDCSetUpCorrection" 1686 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1687 { 1688 PetscErrorCode ierr; 1689 /* pointers to pcis and pcbddc */ 1690 PC_IS* pcis = (PC_IS*)pc->data; 1691 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1692 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1693 /* submatrices of local problem */ 1694 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1695 /* submatrices of local coarse problem */ 1696 Mat S_VV,S_CV,S_VC,S_CC; 1697 /* working matrices */ 1698 Mat C_CR; 1699 /* additional working stuff */ 1700 PC pc_R; 1701 Mat F; 1702 Vec dummy_vec; 1703 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1704 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1705 PetscScalar *work; 1706 PetscInt *idx_V_B; 1707 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1708 PetscInt i,n_R,n_D,n_B; 1709 1710 /* some shortcuts to scalars */ 1711 PetscScalar one=1.0,m_one=-1.0; 1712 1713 PetscFunctionBegin; 1714 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1715 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1716 } 1717 1718 /* Set Non-overlapping dimensions */ 1719 n_vertices = pcbddc->n_vertices; 1720 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1721 n_B = pcis->n_B; 1722 n_D = pcis->n - n_B; 1723 n_R = pcis->n - n_vertices; 1724 1725 /* vertices in boundary numbering */ 1726 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1727 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1728 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1729 1730 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1731 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1732 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1733 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1735 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1736 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1737 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1739 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1740 1741 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1742 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1743 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1744 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1745 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1746 lda_rhs = n_R; 1747 need_benign_correction = PETSC_FALSE; 1748 if (isLU || isILU || isCHOL) { 1749 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1750 } else if (sub_schurs && sub_schurs->reuse_solver) { 1751 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1752 MatFactorType type; 1753 1754 F = reuse_solver->F; 1755 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1756 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1757 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1758 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1759 } else { 1760 F = NULL; 1761 } 1762 1763 /* allocate workspace */ 1764 n = 0; 1765 if (n_constraints) { 1766 n += lda_rhs*n_constraints; 1767 } 1768 if (n_vertices) { 1769 n = PetscMax(2*lda_rhs*n_vertices,n); 1770 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1771 } 1772 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1773 1774 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1775 dummy_vec = NULL; 1776 if (need_benign_correction && lda_rhs != n_R && F) { 1777 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1778 } 1779 1780 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1781 if (n_constraints) { 1782 Mat M1,M2,M3,C_B; 1783 IS is_aux; 1784 PetscScalar *array,*array2; 1785 1786 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1787 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1788 1789 /* Extract constraints on R nodes: C_{CR} */ 1790 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1791 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1792 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1793 1794 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1795 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1796 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1797 for (i=0;i<n_constraints;i++) { 1798 const PetscScalar *row_cmat_values; 1799 const PetscInt *row_cmat_indices; 1800 PetscInt size_of_constraint,j; 1801 1802 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1803 for (j=0;j<size_of_constraint;j++) { 1804 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1805 } 1806 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1807 } 1808 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1809 if (F) { 1810 Mat B; 1811 1812 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1813 if (need_benign_correction) { 1814 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1815 1816 /* rhs is already zero on interior dofs, no need to change the rhs */ 1817 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1818 } 1819 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1820 if (need_benign_correction) { 1821 PetscScalar *marr; 1822 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1823 1824 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1825 if (lda_rhs != n_R) { 1826 for (i=0;i<n_constraints;i++) { 1827 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1828 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1829 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1830 } 1831 } else { 1832 for (i=0;i<n_constraints;i++) { 1833 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1834 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1835 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1836 } 1837 } 1838 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1839 } 1840 ierr = MatDestroy(&B);CHKERRQ(ierr); 1841 } else { 1842 PetscScalar *marr; 1843 1844 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1845 for (i=0;i<n_constraints;i++) { 1846 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1847 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1848 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1849 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1850 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1851 } 1852 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1853 } 1854 if (!pcbddc->switch_static) { 1855 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1856 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1857 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1858 for (i=0;i<n_constraints;i++) { 1859 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1860 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1861 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1862 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1863 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1864 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1865 } 1866 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1867 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1868 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1869 } else { 1870 if (lda_rhs != n_R) { 1871 IS dummy; 1872 1873 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1874 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1875 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1876 } else { 1877 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1878 pcbddc->local_auxmat2 = local_auxmat2_R; 1879 } 1880 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1881 } 1882 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1883 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1884 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1885 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1886 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1887 if (isCHOL) { 1888 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1889 } else { 1890 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1891 } 1892 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1893 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1894 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1895 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1896 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1897 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1898 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1899 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1900 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1901 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1902 } 1903 1904 /* Get submatrices from subdomain matrix */ 1905 if (n_vertices) { 1906 IS is_aux; 1907 1908 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1909 IS tis; 1910 1911 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1912 ierr = ISSort(tis);CHKERRQ(ierr); 1913 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1914 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1915 } else { 1916 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1917 } 1918 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1919 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1920 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1921 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1922 } 1923 1924 /* Matrix of coarse basis functions (local) */ 1925 if (pcbddc->coarse_phi_B) { 1926 PetscInt on_B,on_primal,on_D=n_D; 1927 if (pcbddc->coarse_phi_D) { 1928 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1929 } 1930 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1931 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1932 PetscScalar *marray; 1933 1934 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1935 ierr = PetscFree(marray);CHKERRQ(ierr); 1936 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1937 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1938 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1939 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1940 } 1941 } 1942 1943 if (!pcbddc->coarse_phi_B) { 1944 PetscScalar *marray; 1945 1946 n = n_B*pcbddc->local_primal_size; 1947 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1948 n += n_D*pcbddc->local_primal_size; 1949 } 1950 if (!pcbddc->symmetric_primal) { 1951 n *= 2; 1952 } 1953 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1955 n = n_B*pcbddc->local_primal_size; 1956 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1957 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1958 n += n_D*pcbddc->local_primal_size; 1959 } 1960 if (!pcbddc->symmetric_primal) { 1961 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1962 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1963 n = n_B*pcbddc->local_primal_size; 1964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1965 } 1966 } else { 1967 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1968 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1969 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1970 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1971 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1972 } 1973 } 1974 } 1975 1976 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1977 p0_lidx_I = NULL; 1978 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1979 const PetscInt *idxs; 1980 1981 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1982 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1983 for (i=0;i<pcbddc->benign_n;i++) { 1984 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1985 } 1986 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1987 } 1988 1989 /* vertices */ 1990 if (n_vertices) { 1991 1992 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1993 1994 if (n_R) { 1995 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1996 PetscBLASInt B_N,B_one = 1; 1997 PetscScalar *x,*y; 1998 PetscBool isseqaij; 1999 2000 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2001 if (need_benign_correction) { 2002 ISLocalToGlobalMapping RtoN; 2003 IS is_p0; 2004 PetscInt *idxs_p0,n; 2005 2006 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2007 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2008 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2009 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2010 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2011 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2012 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2013 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2014 } 2015 2016 if (lda_rhs == n_R) { 2017 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2018 } else { 2019 PetscScalar *av,*array; 2020 const PetscInt *xadj,*adjncy; 2021 PetscInt n; 2022 PetscBool flg_row; 2023 2024 array = work+lda_rhs*n_vertices; 2025 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2026 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2027 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2028 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2029 for (i=0;i<n;i++) { 2030 PetscInt j; 2031 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2032 } 2033 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2034 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2035 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2036 } 2037 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2038 if (need_benign_correction) { 2039 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2040 PetscScalar *marr; 2041 2042 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2043 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2044 2045 | 0 0 0 | (V) 2046 L = | 0 0 -1 | (P-p0) 2047 | 0 0 -1 | (p0) 2048 2049 */ 2050 for (i=0;i<reuse_solver->benign_n;i++) { 2051 const PetscScalar *vals; 2052 const PetscInt *idxs,*idxs_zero; 2053 PetscInt n,j,nz; 2054 2055 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2056 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2057 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2058 for (j=0;j<n;j++) { 2059 PetscScalar val = vals[j]; 2060 PetscInt k,col = idxs[j]; 2061 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2062 } 2063 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2064 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2065 } 2066 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2067 } 2068 if (F) { 2069 /* need to correct the rhs */ 2070 if (need_benign_correction) { 2071 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2072 PetscScalar *marr; 2073 2074 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2075 if (lda_rhs != n_R) { 2076 for (i=0;i<n_vertices;i++) { 2077 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2078 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2079 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2080 } 2081 } else { 2082 for (i=0;i<n_vertices;i++) { 2083 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2084 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2085 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2086 } 2087 } 2088 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2089 } 2090 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2091 /* need to correct the solution */ 2092 if (need_benign_correction) { 2093 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2094 PetscScalar *marr; 2095 2096 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2097 if (lda_rhs != n_R) { 2098 for (i=0;i<n_vertices;i++) { 2099 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2100 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2101 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2102 } 2103 } else { 2104 for (i=0;i<n_vertices;i++) { 2105 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2106 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2107 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2108 } 2109 } 2110 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2111 } 2112 } else { 2113 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2114 for (i=0;i<n_vertices;i++) { 2115 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2116 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2117 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2118 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2119 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2120 } 2121 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2122 } 2123 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2124 /* S_VV and S_CV */ 2125 if (n_constraints) { 2126 Mat B; 2127 2128 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2129 for (i=0;i<n_vertices;i++) { 2130 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2131 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2132 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2133 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2134 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2135 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2136 } 2137 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2138 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2139 ierr = MatDestroy(&B);CHKERRQ(ierr); 2140 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2141 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2142 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2143 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2144 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2145 ierr = MatDestroy(&B);CHKERRQ(ierr); 2146 } 2147 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2148 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2149 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2150 } 2151 if (lda_rhs != n_R) { 2152 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2154 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2155 } 2156 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2157 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2158 if (need_benign_correction) { 2159 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2160 PetscScalar *marr,*sums; 2161 2162 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2163 ierr = MatDenseGetArray(S_VVt,&marr); 2164 for (i=0;i<reuse_solver->benign_n;i++) { 2165 const PetscScalar *vals; 2166 const PetscInt *idxs,*idxs_zero; 2167 PetscInt n,j,nz; 2168 2169 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2170 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2171 for (j=0;j<n_vertices;j++) { 2172 PetscInt k; 2173 sums[j] = 0.; 2174 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2175 } 2176 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2177 for (j=0;j<n;j++) { 2178 PetscScalar val = vals[j]; 2179 PetscInt k; 2180 for (k=0;k<n_vertices;k++) { 2181 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2182 } 2183 } 2184 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2185 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2186 } 2187 ierr = PetscFree(sums);CHKERRQ(ierr); 2188 ierr = MatDenseRestoreArray(S_VVt,&marr); 2189 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2190 } 2191 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2192 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2193 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2194 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2195 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2196 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2197 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2198 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2199 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2200 } else { 2201 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2202 } 2203 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2204 2205 /* coarse basis functions */ 2206 for (i=0;i<n_vertices;i++) { 2207 PetscScalar *y; 2208 2209 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2210 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2211 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2212 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2213 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2214 y[n_B*i+idx_V_B[i]] = 1.0; 2215 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2216 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2217 2218 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2219 PetscInt j; 2220 2221 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2222 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2223 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2224 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2225 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2226 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2227 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2228 } 2229 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2230 } 2231 /* if n_R == 0 the object is not destroyed */ 2232 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2233 } 2234 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2235 2236 if (n_constraints) { 2237 Mat B; 2238 2239 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2240 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2241 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2242 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2243 if (n_vertices) { 2244 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2245 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2246 } else { 2247 Mat S_VCt; 2248 2249 if (lda_rhs != n_R) { 2250 ierr = MatDestroy(&B);CHKERRQ(ierr); 2251 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2252 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2253 } 2254 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2255 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2256 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2257 } 2258 } 2259 ierr = MatDestroy(&B);CHKERRQ(ierr); 2260 /* coarse basis functions */ 2261 for (i=0;i<n_constraints;i++) { 2262 PetscScalar *y; 2263 2264 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2265 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2266 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2267 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2268 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2269 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2270 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2271 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2272 PetscInt j; 2273 2274 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2275 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2276 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2277 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2278 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2279 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2280 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2281 } 2282 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2283 } 2284 } 2285 if (n_constraints) { 2286 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2287 } 2288 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2289 2290 /* coarse matrix entries relative to B_0 */ 2291 if (pcbddc->benign_n) { 2292 Mat B0_B,B0_BPHI; 2293 IS is_dummy; 2294 PetscScalar *data; 2295 PetscInt j; 2296 2297 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2298 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2299 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2300 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2301 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2302 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2303 for (j=0;j<pcbddc->benign_n;j++) { 2304 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2305 for (i=0;i<pcbddc->local_primal_size;i++) { 2306 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2307 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2308 } 2309 } 2310 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2311 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2312 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2313 } 2314 2315 /* compute other basis functions for non-symmetric problems */ 2316 if (!pcbddc->symmetric_primal) { 2317 Mat B_V=NULL,B_C=NULL; 2318 PetscScalar *marray; 2319 2320 if (n_constraints) { 2321 Mat S_CCT,C_CRT; 2322 2323 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2324 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2325 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2326 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2327 if (n_vertices) { 2328 Mat S_VCT; 2329 2330 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2331 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2332 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2333 } 2334 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2335 } 2336 if (n_vertices && n_R) { 2337 PetscScalar *av,*marray; 2338 const PetscInt *xadj,*adjncy; 2339 PetscInt n; 2340 PetscBool flg_row; 2341 2342 /* B_V = B_V - A_VR^T */ 2343 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2344 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2345 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2346 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2347 for (i=0;i<n;i++) { 2348 PetscInt j; 2349 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2350 } 2351 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2352 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2353 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2354 } 2355 2356 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2357 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2358 for (i=0;i<n_vertices;i++) { 2359 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2360 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2361 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2362 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2363 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2364 } 2365 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2366 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2367 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2368 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2369 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2370 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2371 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2372 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2373 } 2374 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2375 /* coarse basis functions */ 2376 for (i=0;i<pcbddc->local_primal_size;i++) { 2377 PetscScalar *y; 2378 2379 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2380 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2381 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2382 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2383 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2384 if (i<n_vertices) { 2385 y[n_B*i+idx_V_B[i]] = 1.0; 2386 } 2387 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2388 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2389 2390 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2391 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2392 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2393 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2394 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2395 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2396 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2397 } 2398 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2399 } 2400 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2401 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2402 } 2403 /* free memory */ 2404 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2405 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2406 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2407 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2408 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2409 ierr = PetscFree(work);CHKERRQ(ierr); 2410 if (n_vertices) { 2411 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2412 } 2413 if (n_constraints) { 2414 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2415 } 2416 /* Checking coarse_sub_mat and coarse basis functios */ 2417 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2418 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2419 if (pcbddc->dbg_flag) { 2420 Mat coarse_sub_mat; 2421 Mat AUXMAT,TM1,TM2,TM3,TM4; 2422 Mat coarse_phi_D,coarse_phi_B; 2423 Mat coarse_psi_D,coarse_psi_B; 2424 Mat A_II,A_BB,A_IB,A_BI; 2425 Mat C_B,CPHI; 2426 IS is_dummy; 2427 Vec mones; 2428 MatType checkmattype=MATSEQAIJ; 2429 PetscReal real_value; 2430 2431 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2432 Mat A; 2433 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2434 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2435 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2436 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2437 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2438 ierr = MatDestroy(&A);CHKERRQ(ierr); 2439 } else { 2440 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2441 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2442 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2443 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2444 } 2445 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2446 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2447 if (!pcbddc->symmetric_primal) { 2448 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2449 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2450 } 2451 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2452 2453 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2455 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2456 if (!pcbddc->symmetric_primal) { 2457 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2458 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2459 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2460 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2461 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2462 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2463 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2464 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2465 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2466 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2467 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2468 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2469 } else { 2470 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2471 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2472 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2473 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2474 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2475 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2476 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2477 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2478 } 2479 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2480 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2481 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2482 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2483 if (pcbddc->benign_n) { 2484 Mat B0_B,B0_BPHI; 2485 PetscScalar *data,*data2; 2486 PetscInt j; 2487 2488 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2489 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2490 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2491 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2492 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2493 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2494 for (j=0;j<pcbddc->benign_n;j++) { 2495 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2496 for (i=0;i<pcbddc->local_primal_size;i++) { 2497 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2498 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2499 } 2500 } 2501 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2502 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2503 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2504 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2505 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2506 } 2507 #if 0 2508 { 2509 PetscViewer viewer; 2510 char filename[256]; 2511 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2512 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2513 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2514 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2515 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2516 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2517 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2518 if (save_change) { 2519 Mat phi_B; 2520 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2521 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2522 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2523 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2524 } else { 2525 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2526 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2527 } 2528 if (pcbddc->coarse_phi_D) { 2529 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2530 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2531 } 2532 if (pcbddc->coarse_psi_B) { 2533 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2534 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2535 } 2536 if (pcbddc->coarse_psi_D) { 2537 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2538 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2539 } 2540 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2541 } 2542 #endif 2543 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2544 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2545 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2547 2548 /* check constraints */ 2549 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2550 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2551 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2552 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2553 } else { 2554 PetscScalar *data; 2555 Mat tmat; 2556 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2557 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2558 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2559 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2560 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2561 } 2562 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2563 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2564 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2565 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2567 if (!pcbddc->symmetric_primal) { 2568 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2569 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2570 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2571 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2573 } 2574 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2575 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2576 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2577 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2579 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2580 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2581 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2582 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2583 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2584 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2585 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2586 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2587 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2588 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2589 if (!pcbddc->symmetric_primal) { 2590 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2591 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2592 } 2593 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2594 } 2595 /* get back data */ 2596 *coarse_submat_vals_n = coarse_submat_vals; 2597 PetscFunctionReturn(0); 2598 } 2599 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2602 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2603 { 2604 Mat *work_mat; 2605 IS isrow_s,iscol_s; 2606 PetscBool rsorted,csorted; 2607 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2608 PetscErrorCode ierr; 2609 2610 PetscFunctionBegin; 2611 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2612 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2613 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2614 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2615 2616 if (!rsorted) { 2617 const PetscInt *idxs; 2618 PetscInt *idxs_sorted,i; 2619 2620 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2621 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2622 for (i=0;i<rsize;i++) { 2623 idxs_perm_r[i] = i; 2624 } 2625 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2626 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2627 for (i=0;i<rsize;i++) { 2628 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2629 } 2630 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2631 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2632 } else { 2633 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2634 isrow_s = isrow; 2635 } 2636 2637 if (!csorted) { 2638 if (isrow == iscol) { 2639 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2640 iscol_s = isrow_s; 2641 } else { 2642 const PetscInt *idxs; 2643 PetscInt *idxs_sorted,i; 2644 2645 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2646 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2647 for (i=0;i<csize;i++) { 2648 idxs_perm_c[i] = i; 2649 } 2650 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2651 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2652 for (i=0;i<csize;i++) { 2653 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2654 } 2655 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2656 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2657 } 2658 } else { 2659 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2660 iscol_s = iscol; 2661 } 2662 2663 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2664 2665 if (!rsorted || !csorted) { 2666 Mat new_mat; 2667 IS is_perm_r,is_perm_c; 2668 2669 if (!rsorted) { 2670 PetscInt *idxs_r,i; 2671 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2672 for (i=0;i<rsize;i++) { 2673 idxs_r[idxs_perm_r[i]] = i; 2674 } 2675 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2676 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2677 } else { 2678 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2679 } 2680 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2681 2682 if (!csorted) { 2683 if (isrow_s == iscol_s) { 2684 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2685 is_perm_c = is_perm_r; 2686 } else { 2687 PetscInt *idxs_c,i; 2688 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2689 for (i=0;i<csize;i++) { 2690 idxs_c[idxs_perm_c[i]] = i; 2691 } 2692 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2693 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2694 } 2695 } else { 2696 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2697 } 2698 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2699 2700 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2701 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2702 work_mat[0] = new_mat; 2703 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2704 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2705 } 2706 2707 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2708 *B = work_mat[0]; 2709 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2710 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2711 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2712 PetscFunctionReturn(0); 2713 } 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2717 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2718 { 2719 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2720 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2721 Mat new_mat; 2722 IS is_local,is_global; 2723 PetscInt local_size; 2724 PetscBool isseqaij; 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2729 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2730 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2731 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2732 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2733 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2734 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2735 2736 /* check */ 2737 if (pcbddc->dbg_flag) { 2738 Vec x,x_change; 2739 PetscReal error; 2740 2741 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2742 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2743 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2744 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2745 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2746 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2747 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2748 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2749 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2750 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2751 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2752 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2753 ierr = VecDestroy(&x);CHKERRQ(ierr); 2754 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2755 } 2756 2757 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2758 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2759 if (isseqaij) { 2760 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2761 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2762 } else { 2763 Mat work_mat; 2764 2765 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2766 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2767 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2768 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2769 } 2770 if (matis->A->symmetric_set) { 2771 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2772 #if !defined(PETSC_USE_COMPLEX) 2773 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2774 #endif 2775 } 2776 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2782 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2783 { 2784 PC_IS* pcis = (PC_IS*)(pc->data); 2785 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2786 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2787 PetscInt *idx_R_local=NULL; 2788 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2789 PetscInt vbs,bs; 2790 PetscBT bitmask=NULL; 2791 PetscErrorCode ierr; 2792 2793 PetscFunctionBegin; 2794 /* 2795 No need to setup local scatters if 2796 - primal space is unchanged 2797 AND 2798 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2799 AND 2800 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2801 */ 2802 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2803 PetscFunctionReturn(0); 2804 } 2805 /* destroy old objects */ 2806 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2807 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2808 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2809 /* Set Non-overlapping dimensions */ 2810 n_B = pcis->n_B; 2811 n_D = pcis->n - n_B; 2812 n_vertices = pcbddc->n_vertices; 2813 2814 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2815 2816 /* create auxiliary bitmask and allocate workspace */ 2817 if (!sub_schurs || !sub_schurs->reuse_solver) { 2818 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2819 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2820 for (i=0;i<n_vertices;i++) { 2821 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2822 } 2823 2824 for (i=0, n_R=0; i<pcis->n; i++) { 2825 if (!PetscBTLookup(bitmask,i)) { 2826 idx_R_local[n_R++] = i; 2827 } 2828 } 2829 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2830 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2831 2832 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2833 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2834 } 2835 2836 /* Block code */ 2837 vbs = 1; 2838 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2839 if (bs>1 && !(n_vertices%bs)) { 2840 PetscBool is_blocked = PETSC_TRUE; 2841 PetscInt *vary; 2842 if (!sub_schurs || !sub_schurs->reuse_solver) { 2843 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2844 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2845 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2846 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2847 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2848 for (i=0; i<pcis->n/bs; i++) { 2849 if (vary[i]!=0 && vary[i]!=bs) { 2850 is_blocked = PETSC_FALSE; 2851 break; 2852 } 2853 } 2854 ierr = PetscFree(vary);CHKERRQ(ierr); 2855 } else { 2856 /* Verify directly the R set */ 2857 for (i=0; i<n_R/bs; i++) { 2858 PetscInt j,node=idx_R_local[bs*i]; 2859 for (j=1; j<bs; j++) { 2860 if (node != idx_R_local[bs*i+j]-j) { 2861 is_blocked = PETSC_FALSE; 2862 break; 2863 } 2864 } 2865 } 2866 } 2867 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2868 vbs = bs; 2869 for (i=0;i<n_R/vbs;i++) { 2870 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2871 } 2872 } 2873 } 2874 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2875 if (sub_schurs && sub_schurs->reuse_solver) { 2876 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2877 2878 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2879 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2880 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2881 reuse_solver->is_R = pcbddc->is_R_local; 2882 } else { 2883 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2884 } 2885 2886 /* print some info if requested */ 2887 if (pcbddc->dbg_flag) { 2888 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2889 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2890 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2891 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2892 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2893 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2894 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2895 } 2896 2897 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2898 if (!sub_schurs || !sub_schurs->reuse_solver) { 2899 IS is_aux1,is_aux2; 2900 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2901 2902 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2903 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2904 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2905 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2906 for (i=0; i<n_D; i++) { 2907 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2908 } 2909 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2910 for (i=0, j=0; i<n_R; i++) { 2911 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2912 aux_array1[j++] = i; 2913 } 2914 } 2915 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2916 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2917 for (i=0, j=0; i<n_B; i++) { 2918 if (!PetscBTLookup(bitmask,is_indices[i])) { 2919 aux_array2[j++] = i; 2920 } 2921 } 2922 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2923 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2924 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2925 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2926 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2927 2928 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2929 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2930 for (i=0, j=0; i<n_R; i++) { 2931 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2932 aux_array1[j++] = i; 2933 } 2934 } 2935 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2936 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2937 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2938 } 2939 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2940 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2941 } else { 2942 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2943 IS tis; 2944 PetscInt schur_size; 2945 2946 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2947 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2948 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2949 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2950 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2951 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2952 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2953 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2954 } 2955 } 2956 PetscFunctionReturn(0); 2957 } 2958 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2962 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2963 { 2964 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2965 PC_IS *pcis = (PC_IS*)pc->data; 2966 PC pc_temp; 2967 Mat A_RR; 2968 MatReuse reuse; 2969 PetscScalar m_one = -1.0; 2970 PetscReal value; 2971 PetscInt n_D,n_R; 2972 PetscBool check_corr[2],issbaij; 2973 PetscErrorCode ierr; 2974 /* prefixes stuff */ 2975 char dir_prefix[256],neu_prefix[256],str_level[16]; 2976 size_t len; 2977 2978 PetscFunctionBegin; 2979 2980 /* compute prefixes */ 2981 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2982 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2983 if (!pcbddc->current_level) { 2984 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2985 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2986 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2987 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2988 } else { 2989 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2990 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2991 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2992 len -= 15; /* remove "pc_bddc_coarse_" */ 2993 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2994 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2995 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2996 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2997 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2998 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2999 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3000 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3001 } 3002 3003 /* DIRICHLET PROBLEM */ 3004 if (dirichlet) { 3005 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3006 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3007 if (!sub_schurs || !sub_schurs->reuse_solver) { 3008 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3009 } 3010 if (pcbddc->dbg_flag) { 3011 Mat A_IIn; 3012 3013 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3014 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3015 pcis->A_II = A_IIn; 3016 } 3017 } 3018 if (pcbddc->local_mat->symmetric_set) { 3019 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3020 } 3021 /* Matrix for Dirichlet problem is pcis->A_II */ 3022 n_D = pcis->n - pcis->n_B; 3023 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3024 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3025 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3026 /* default */ 3027 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3028 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3029 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3030 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3031 if (issbaij) { 3032 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3033 } else { 3034 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3035 } 3036 /* Allow user's customization */ 3037 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3038 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3039 } 3040 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3041 if (sub_schurs && sub_schurs->reuse_solver) { 3042 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3043 3044 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3045 } 3046 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3047 if (!n_D) { 3048 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3049 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3050 } 3051 /* Set Up KSP for Dirichlet problem of BDDC */ 3052 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3053 /* set ksp_D into pcis data */ 3054 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3055 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3056 pcis->ksp_D = pcbddc->ksp_D; 3057 } 3058 3059 /* NEUMANN PROBLEM */ 3060 A_RR = 0; 3061 if (neumann) { 3062 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3063 PetscInt ibs,mbs; 3064 PetscBool issbaij; 3065 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3066 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3067 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3068 if (pcbddc->ksp_R) { /* already created ksp */ 3069 PetscInt nn_R; 3070 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3071 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3072 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3073 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3074 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3075 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3076 reuse = MAT_INITIAL_MATRIX; 3077 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3078 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3079 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3080 reuse = MAT_INITIAL_MATRIX; 3081 } else { /* safe to reuse the matrix */ 3082 reuse = MAT_REUSE_MATRIX; 3083 } 3084 } 3085 /* last check */ 3086 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3087 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3088 reuse = MAT_INITIAL_MATRIX; 3089 } 3090 } else { /* first time, so we need to create the matrix */ 3091 reuse = MAT_INITIAL_MATRIX; 3092 } 3093 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3094 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3095 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3096 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3097 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3098 if (matis->A == pcbddc->local_mat) { 3099 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3100 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3101 } else { 3102 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3103 } 3104 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3105 if (matis->A == pcbddc->local_mat) { 3106 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3107 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3108 } else { 3109 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3110 } 3111 } 3112 /* extract A_RR */ 3113 if (sub_schurs && sub_schurs->reuse_solver) { 3114 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3115 3116 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3117 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3118 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3119 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3120 } else { 3121 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3122 } 3123 } else { 3124 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3125 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3126 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3127 } 3128 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3129 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3130 } 3131 if (pcbddc->local_mat->symmetric_set) { 3132 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3133 } 3134 if (!pcbddc->ksp_R) { /* create object if not present */ 3135 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3136 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3137 /* default */ 3138 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3139 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3140 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3141 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3142 if (issbaij) { 3143 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3144 } else { 3145 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3146 } 3147 /* Allow user's customization */ 3148 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3149 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3150 } 3151 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3152 if (!n_R) { 3153 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3154 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3155 } 3156 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3157 /* Reuse solver if it is present */ 3158 if (sub_schurs && sub_schurs->reuse_solver) { 3159 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3160 3161 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3162 } 3163 /* Set Up KSP for Neumann problem of BDDC */ 3164 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3165 } 3166 3167 if (pcbddc->dbg_flag) { 3168 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3169 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3170 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3171 } 3172 3173 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3174 check_corr[0] = check_corr[1] = PETSC_FALSE; 3175 if (pcbddc->NullSpace_corr[0]) { 3176 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3177 } 3178 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3179 check_corr[0] = PETSC_TRUE; 3180 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3181 } 3182 if (neumann && pcbddc->NullSpace_corr[2]) { 3183 check_corr[1] = PETSC_TRUE; 3184 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3185 } 3186 3187 /* check Dirichlet and Neumann solvers */ 3188 if (pcbddc->dbg_flag) { 3189 if (dirichlet) { /* Dirichlet */ 3190 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3191 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3192 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3193 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3194 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3196 if (check_corr[0]) { 3197 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3198 } 3199 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3200 } 3201 if (neumann) { /* Neumann */ 3202 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3203 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3204 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3205 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3206 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3208 if (check_corr[1]) { 3209 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3210 } 3211 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3212 } 3213 } 3214 /* free Neumann problem's matrix */ 3215 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3221 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3222 { 3223 PetscErrorCode ierr; 3224 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3225 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3226 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3227 3228 PetscFunctionBegin; 3229 if (!reuse_solver) { 3230 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3231 } 3232 if (!pcbddc->switch_static) { 3233 if (applytranspose && pcbddc->local_auxmat1) { 3234 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3235 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3236 } 3237 if (!reuse_solver) { 3238 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3239 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3240 } else { 3241 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3242 3243 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3244 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3245 } 3246 } else { 3247 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3248 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3249 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3250 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3251 if (applytranspose && pcbddc->local_auxmat1) { 3252 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3253 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3254 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3255 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3256 } 3257 } 3258 if (!reuse_solver || pcbddc->switch_static) { 3259 if (applytranspose) { 3260 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3261 } else { 3262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3263 } 3264 } else { 3265 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3266 3267 if (applytranspose) { 3268 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3269 } else { 3270 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3271 } 3272 } 3273 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3274 if (!pcbddc->switch_static) { 3275 if (!reuse_solver) { 3276 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3277 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3278 } else { 3279 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3280 3281 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3282 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3283 } 3284 if (!applytranspose && pcbddc->local_auxmat1) { 3285 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3286 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3287 } 3288 } else { 3289 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3290 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3291 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3292 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3293 if (!applytranspose && pcbddc->local_auxmat1) { 3294 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3295 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3296 } 3297 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3298 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3299 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3300 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3301 } 3302 PetscFunctionReturn(0); 3303 } 3304 3305 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3308 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3309 { 3310 PetscErrorCode ierr; 3311 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3312 PC_IS* pcis = (PC_IS*) (pc->data); 3313 const PetscScalar zero = 0.0; 3314 3315 PetscFunctionBegin; 3316 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3317 if (!pcbddc->benign_apply_coarse_only) { 3318 if (applytranspose) { 3319 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3320 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3321 } else { 3322 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3323 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3324 } 3325 } else { 3326 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3327 } 3328 3329 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3330 if (pcbddc->benign_n) { 3331 PetscScalar *array; 3332 PetscInt j; 3333 3334 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3335 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3336 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3337 } 3338 3339 /* start communications from local primal nodes to rhs of coarse solver */ 3340 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3341 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3342 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3343 3344 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3345 if (pcbddc->coarse_ksp) { 3346 Mat coarse_mat; 3347 Vec rhs,sol; 3348 MatNullSpace nullsp; 3349 PetscBool isbddc = PETSC_FALSE; 3350 3351 if (pcbddc->benign_have_null) { 3352 PC coarse_pc; 3353 3354 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3355 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3356 /* we need to propagate to coarser levels the need for a possible benign correction */ 3357 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3358 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3359 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3360 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3361 } 3362 } 3363 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3364 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3365 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3366 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3367 if (nullsp) { 3368 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3369 } 3370 if (applytranspose) { 3371 if (pcbddc->benign_apply_coarse_only) { 3372 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3373 } else { 3374 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3375 } 3376 } else { 3377 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3378 PC coarse_pc; 3379 3380 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3381 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3382 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3383 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3384 } else { 3385 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3386 } 3387 } 3388 /* we don't need the benign correction at coarser levels anymore */ 3389 if (pcbddc->benign_have_null && isbddc) { 3390 PC coarse_pc; 3391 PC_BDDC* coarsepcbddc; 3392 3393 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3394 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3395 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3396 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3397 } 3398 if (nullsp) { 3399 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3400 } 3401 } 3402 3403 /* Local solution on R nodes */ 3404 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3405 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3406 } 3407 /* communications from coarse sol to local primal nodes */ 3408 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3409 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3410 3411 /* Sum contributions from the two levels */ 3412 if (!pcbddc->benign_apply_coarse_only) { 3413 if (applytranspose) { 3414 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3415 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3416 } else { 3417 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3418 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3419 } 3420 /* store p0 */ 3421 if (pcbddc->benign_n) { 3422 PetscScalar *array; 3423 PetscInt j; 3424 3425 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3426 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3427 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3428 } 3429 } else { /* expand the coarse solution */ 3430 if (applytranspose) { 3431 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3432 } else { 3433 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3434 } 3435 } 3436 PetscFunctionReturn(0); 3437 } 3438 3439 #undef __FUNCT__ 3440 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3441 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3442 { 3443 PetscErrorCode ierr; 3444 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3445 PetscScalar *array; 3446 Vec from,to; 3447 3448 PetscFunctionBegin; 3449 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3450 from = pcbddc->coarse_vec; 3451 to = pcbddc->vec1_P; 3452 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3453 Vec tvec; 3454 3455 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3456 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3457 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3458 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3459 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3460 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3461 } 3462 } else { /* from local to global -> put data in coarse right hand side */ 3463 from = pcbddc->vec1_P; 3464 to = pcbddc->coarse_vec; 3465 } 3466 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3467 PetscFunctionReturn(0); 3468 } 3469 3470 #undef __FUNCT__ 3471 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3472 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3473 { 3474 PetscErrorCode ierr; 3475 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3476 PetscScalar *array; 3477 Vec from,to; 3478 3479 PetscFunctionBegin; 3480 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3481 from = pcbddc->coarse_vec; 3482 to = pcbddc->vec1_P; 3483 } else { /* from local to global -> put data in coarse right hand side */ 3484 from = pcbddc->vec1_P; 3485 to = pcbddc->coarse_vec; 3486 } 3487 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3488 if (smode == SCATTER_FORWARD) { 3489 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3490 Vec tvec; 3491 3492 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3493 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3494 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3495 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3496 } 3497 } else { 3498 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3499 ierr = VecResetArray(from);CHKERRQ(ierr); 3500 } 3501 } 3502 PetscFunctionReturn(0); 3503 } 3504 3505 /* uncomment for testing purposes */ 3506 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3507 #undef __FUNCT__ 3508 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3509 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3510 { 3511 PetscErrorCode ierr; 3512 PC_IS* pcis = (PC_IS*)(pc->data); 3513 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3514 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3515 /* one and zero */ 3516 PetscScalar one=1.0,zero=0.0; 3517 /* space to store constraints and their local indices */ 3518 PetscScalar *constraints_data; 3519 PetscInt *constraints_idxs,*constraints_idxs_B; 3520 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3521 PetscInt *constraints_n; 3522 /* iterators */ 3523 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3524 /* BLAS integers */ 3525 PetscBLASInt lwork,lierr; 3526 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3527 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3528 /* reuse */ 3529 PetscInt olocal_primal_size,olocal_primal_size_cc; 3530 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3531 /* change of basis */ 3532 PetscBool qr_needed; 3533 PetscBT change_basis,qr_needed_idx; 3534 /* auxiliary stuff */ 3535 PetscInt *nnz,*is_indices; 3536 PetscInt ncc; 3537 /* some quantities */ 3538 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3539 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3540 3541 PetscFunctionBegin; 3542 /* Destroy Mat objects computed previously */ 3543 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3544 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3545 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3546 /* save info on constraints from previous setup (if any) */ 3547 olocal_primal_size = pcbddc->local_primal_size; 3548 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3549 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3550 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3551 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3552 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3553 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3554 3555 if (!pcbddc->adaptive_selection) { 3556 IS ISForVertices,*ISForFaces,*ISForEdges; 3557 MatNullSpace nearnullsp; 3558 const Vec *nearnullvecs; 3559 Vec *localnearnullsp; 3560 PetscScalar *array; 3561 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3562 PetscBool nnsp_has_cnst; 3563 /* LAPACK working arrays for SVD or POD */ 3564 PetscBool skip_lapack,boolforchange; 3565 PetscScalar *work; 3566 PetscReal *singular_vals; 3567 #if defined(PETSC_USE_COMPLEX) 3568 PetscReal *rwork; 3569 #endif 3570 #if defined(PETSC_MISSING_LAPACK_GESVD) 3571 PetscScalar *temp_basis,*correlation_mat; 3572 #else 3573 PetscBLASInt dummy_int=1; 3574 PetscScalar dummy_scalar=1.; 3575 #endif 3576 3577 /* Get index sets for faces, edges and vertices from graph */ 3578 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3579 /* print some info */ 3580 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3581 PetscInt nv; 3582 3583 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3584 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3585 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3587 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3590 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3591 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3592 } 3593 3594 /* free unneeded index sets */ 3595 if (!pcbddc->use_vertices) { 3596 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3597 } 3598 if (!pcbddc->use_edges) { 3599 for (i=0;i<n_ISForEdges;i++) { 3600 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3601 } 3602 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3603 n_ISForEdges = 0; 3604 } 3605 if (!pcbddc->use_faces) { 3606 for (i=0;i<n_ISForFaces;i++) { 3607 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3608 } 3609 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3610 n_ISForFaces = 0; 3611 } 3612 3613 /* check if near null space is attached to global mat */ 3614 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3615 if (nearnullsp) { 3616 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3617 /* remove any stored info */ 3618 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3619 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3620 /* store information for BDDC solver reuse */ 3621 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3622 pcbddc->onearnullspace = nearnullsp; 3623 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3624 for (i=0;i<nnsp_size;i++) { 3625 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3626 } 3627 } else { /* if near null space is not provided BDDC uses constants by default */ 3628 nnsp_size = 0; 3629 nnsp_has_cnst = PETSC_TRUE; 3630 } 3631 /* get max number of constraints on a single cc */ 3632 max_constraints = nnsp_size; 3633 if (nnsp_has_cnst) max_constraints++; 3634 3635 /* 3636 Evaluate maximum storage size needed by the procedure 3637 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3638 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3639 There can be multiple constraints per connected component 3640 */ 3641 n_vertices = 0; 3642 if (ISForVertices) { 3643 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3644 } 3645 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3646 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3647 3648 total_counts = n_ISForFaces+n_ISForEdges; 3649 total_counts *= max_constraints; 3650 total_counts += n_vertices; 3651 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3652 3653 total_counts = 0; 3654 max_size_of_constraint = 0; 3655 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3656 IS used_is; 3657 if (i<n_ISForEdges) { 3658 used_is = ISForEdges[i]; 3659 } else { 3660 used_is = ISForFaces[i-n_ISForEdges]; 3661 } 3662 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3663 total_counts += j; 3664 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3665 } 3666 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3667 3668 /* get local part of global near null space vectors */ 3669 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3670 for (k=0;k<nnsp_size;k++) { 3671 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3672 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3673 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3674 } 3675 3676 /* whether or not to skip lapack calls */ 3677 skip_lapack = PETSC_TRUE; 3678 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3679 3680 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3681 if (!skip_lapack) { 3682 PetscScalar temp_work; 3683 3684 #if defined(PETSC_MISSING_LAPACK_GESVD) 3685 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3686 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3687 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3688 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3689 #if defined(PETSC_USE_COMPLEX) 3690 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3691 #endif 3692 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3693 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3694 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3695 lwork = -1; 3696 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3697 #if !defined(PETSC_USE_COMPLEX) 3698 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3699 #else 3700 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3701 #endif 3702 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3703 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3704 #else /* on missing GESVD */ 3705 /* SVD */ 3706 PetscInt max_n,min_n; 3707 max_n = max_size_of_constraint; 3708 min_n = max_constraints; 3709 if (max_size_of_constraint < max_constraints) { 3710 min_n = max_size_of_constraint; 3711 max_n = max_constraints; 3712 } 3713 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3714 #if defined(PETSC_USE_COMPLEX) 3715 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3716 #endif 3717 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3718 lwork = -1; 3719 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3720 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3721 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3722 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3723 #if !defined(PETSC_USE_COMPLEX) 3724 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3725 #else 3726 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3727 #endif 3728 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3729 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3730 #endif /* on missing GESVD */ 3731 /* Allocate optimal workspace */ 3732 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3733 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3734 } 3735 /* Now we can loop on constraining sets */ 3736 total_counts = 0; 3737 constraints_idxs_ptr[0] = 0; 3738 constraints_data_ptr[0] = 0; 3739 /* vertices */ 3740 if (n_vertices) { 3741 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3742 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3743 for (i=0;i<n_vertices;i++) { 3744 constraints_n[total_counts] = 1; 3745 constraints_data[total_counts] = 1.0; 3746 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3747 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3748 total_counts++; 3749 } 3750 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3751 n_vertices = total_counts; 3752 } 3753 3754 /* edges and faces */ 3755 total_counts_cc = total_counts; 3756 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3757 IS used_is; 3758 PetscBool idxs_copied = PETSC_FALSE; 3759 3760 if (ncc<n_ISForEdges) { 3761 used_is = ISForEdges[ncc]; 3762 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3763 } else { 3764 used_is = ISForFaces[ncc-n_ISForEdges]; 3765 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3766 } 3767 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3768 3769 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3770 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3771 /* change of basis should not be performed on local periodic nodes */ 3772 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3773 if (nnsp_has_cnst) { 3774 PetscScalar quad_value; 3775 3776 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3777 idxs_copied = PETSC_TRUE; 3778 3779 if (!pcbddc->use_nnsp_true) { 3780 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3781 } else { 3782 quad_value = 1.0; 3783 } 3784 for (j=0;j<size_of_constraint;j++) { 3785 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3786 } 3787 temp_constraints++; 3788 total_counts++; 3789 } 3790 for (k=0;k<nnsp_size;k++) { 3791 PetscReal real_value; 3792 PetscScalar *ptr_to_data; 3793 3794 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3795 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3796 for (j=0;j<size_of_constraint;j++) { 3797 ptr_to_data[j] = array[is_indices[j]]; 3798 } 3799 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3800 /* check if array is null on the connected component */ 3801 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3802 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3803 if (real_value > 0.0) { /* keep indices and values */ 3804 temp_constraints++; 3805 total_counts++; 3806 if (!idxs_copied) { 3807 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3808 idxs_copied = PETSC_TRUE; 3809 } 3810 } 3811 } 3812 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3813 valid_constraints = temp_constraints; 3814 if (!pcbddc->use_nnsp_true && temp_constraints) { 3815 if (temp_constraints == 1) { /* just normalize the constraint */ 3816 PetscScalar norm,*ptr_to_data; 3817 3818 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3819 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3820 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3821 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3822 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3823 } else { /* perform SVD */ 3824 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3825 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3826 3827 #if defined(PETSC_MISSING_LAPACK_GESVD) 3828 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3829 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3830 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3831 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3832 from that computed using LAPACKgesvd 3833 -> This is due to a different computation of eigenvectors in LAPACKheev 3834 -> The quality of the POD-computed basis will be the same */ 3835 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3836 /* Store upper triangular part of correlation matrix */ 3837 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3838 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3839 for (j=0;j<temp_constraints;j++) { 3840 for (k=0;k<j+1;k++) { 3841 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3842 } 3843 } 3844 /* compute eigenvalues and eigenvectors of correlation matrix */ 3845 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3846 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3847 #if !defined(PETSC_USE_COMPLEX) 3848 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3849 #else 3850 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3851 #endif 3852 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3853 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3854 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3855 j = 0; 3856 while (j < temp_constraints && singular_vals[j] < tol) j++; 3857 total_counts = total_counts-j; 3858 valid_constraints = temp_constraints-j; 3859 /* scale and copy POD basis into used quadrature memory */ 3860 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3861 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3862 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3863 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3864 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3865 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3866 if (j<temp_constraints) { 3867 PetscInt ii; 3868 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3869 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3870 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3871 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3872 for (k=0;k<temp_constraints-j;k++) { 3873 for (ii=0;ii<size_of_constraint;ii++) { 3874 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3875 } 3876 } 3877 } 3878 #else /* on missing GESVD */ 3879 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3880 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3881 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3882 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3883 #if !defined(PETSC_USE_COMPLEX) 3884 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3885 #else 3886 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3887 #endif 3888 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3889 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3890 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3891 k = temp_constraints; 3892 if (k > size_of_constraint) k = size_of_constraint; 3893 j = 0; 3894 while (j < k && singular_vals[k-j-1] < tol) j++; 3895 valid_constraints = k-j; 3896 total_counts = total_counts-temp_constraints+valid_constraints; 3897 #endif /* on missing GESVD */ 3898 } 3899 } 3900 /* update pointers information */ 3901 if (valid_constraints) { 3902 constraints_n[total_counts_cc] = valid_constraints; 3903 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3904 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3905 /* set change_of_basis flag */ 3906 if (boolforchange) { 3907 PetscBTSet(change_basis,total_counts_cc); 3908 } 3909 total_counts_cc++; 3910 } 3911 } 3912 /* free workspace */ 3913 if (!skip_lapack) { 3914 ierr = PetscFree(work);CHKERRQ(ierr); 3915 #if defined(PETSC_USE_COMPLEX) 3916 ierr = PetscFree(rwork);CHKERRQ(ierr); 3917 #endif 3918 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3919 #if defined(PETSC_MISSING_LAPACK_GESVD) 3920 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3921 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3922 #endif 3923 } 3924 for (k=0;k<nnsp_size;k++) { 3925 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3926 } 3927 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3928 /* free index sets of faces, edges and vertices */ 3929 for (i=0;i<n_ISForFaces;i++) { 3930 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3931 } 3932 if (n_ISForFaces) { 3933 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3934 } 3935 for (i=0;i<n_ISForEdges;i++) { 3936 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3937 } 3938 if (n_ISForEdges) { 3939 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3940 } 3941 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3942 } else { 3943 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3944 3945 total_counts = 0; 3946 n_vertices = 0; 3947 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3948 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3949 } 3950 max_constraints = 0; 3951 total_counts_cc = 0; 3952 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3953 total_counts += pcbddc->adaptive_constraints_n[i]; 3954 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3955 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3956 } 3957 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3958 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3959 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3960 constraints_data = pcbddc->adaptive_constraints_data; 3961 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3962 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3963 total_counts_cc = 0; 3964 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3965 if (pcbddc->adaptive_constraints_n[i]) { 3966 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3967 } 3968 } 3969 #if 0 3970 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3971 for (i=0;i<total_counts_cc;i++) { 3972 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3973 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3974 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3975 printf(" %d",constraints_idxs[j]); 3976 } 3977 printf("\n"); 3978 printf("number of cc: %d\n",constraints_n[i]); 3979 } 3980 for (i=0;i<n_vertices;i++) { 3981 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3982 } 3983 for (i=0;i<sub_schurs->n_subs;i++) { 3984 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3985 } 3986 #endif 3987 3988 max_size_of_constraint = 0; 3989 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3990 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3991 /* Change of basis */ 3992 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3993 if (pcbddc->use_change_of_basis) { 3994 for (i=0;i<sub_schurs->n_subs;i++) { 3995 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3996 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3997 } 3998 } 3999 } 4000 } 4001 pcbddc->local_primal_size = total_counts; 4002 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4003 4004 /* map constraints_idxs in boundary numbering */ 4005 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4006 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4007 4008 /* Create constraint matrix */ 4009 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4010 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4011 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4012 4013 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4014 /* determine if a QR strategy is needed for change of basis */ 4015 qr_needed = PETSC_FALSE; 4016 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4017 total_primal_vertices=0; 4018 pcbddc->local_primal_size_cc = 0; 4019 for (i=0;i<total_counts_cc;i++) { 4020 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4021 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4022 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4023 pcbddc->local_primal_size_cc += 1; 4024 } else if (PetscBTLookup(change_basis,i)) { 4025 for (k=0;k<constraints_n[i];k++) { 4026 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4027 } 4028 pcbddc->local_primal_size_cc += constraints_n[i]; 4029 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4030 PetscBTSet(qr_needed_idx,i); 4031 qr_needed = PETSC_TRUE; 4032 } 4033 } else { 4034 pcbddc->local_primal_size_cc += 1; 4035 } 4036 } 4037 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4038 pcbddc->n_vertices = total_primal_vertices; 4039 /* permute indices in order to have a sorted set of vertices */ 4040 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4041 4042 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4043 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4044 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4045 4046 /* nonzero structure of constraint matrix */ 4047 /* and get reference dof for local constraints */ 4048 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4049 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4050 4051 j = total_primal_vertices; 4052 total_counts = total_primal_vertices; 4053 cum = total_primal_vertices; 4054 for (i=n_vertices;i<total_counts_cc;i++) { 4055 if (!PetscBTLookup(change_basis,i)) { 4056 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4057 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4058 cum++; 4059 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4060 for (k=0;k<constraints_n[i];k++) { 4061 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4062 nnz[j+k] = size_of_constraint; 4063 } 4064 j += constraints_n[i]; 4065 } 4066 } 4067 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4068 ierr = PetscFree(nnz);CHKERRQ(ierr); 4069 4070 /* set values in constraint matrix */ 4071 for (i=0;i<total_primal_vertices;i++) { 4072 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4073 } 4074 total_counts = total_primal_vertices; 4075 for (i=n_vertices;i<total_counts_cc;i++) { 4076 if (!PetscBTLookup(change_basis,i)) { 4077 PetscInt *cols; 4078 4079 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4080 cols = constraints_idxs+constraints_idxs_ptr[i]; 4081 for (k=0;k<constraints_n[i];k++) { 4082 PetscInt row = total_counts+k; 4083 PetscScalar *vals; 4084 4085 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4086 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4087 } 4088 total_counts += constraints_n[i]; 4089 } 4090 } 4091 /* assembling */ 4092 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4093 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4094 4095 /* 4096 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4097 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4098 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4099 */ 4100 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4101 if (pcbddc->use_change_of_basis) { 4102 /* dual and primal dofs on a single cc */ 4103 PetscInt dual_dofs,primal_dofs; 4104 /* working stuff for GEQRF */ 4105 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4106 PetscBLASInt lqr_work; 4107 /* working stuff for UNGQR */ 4108 PetscScalar *gqr_work,lgqr_work_t; 4109 PetscBLASInt lgqr_work; 4110 /* working stuff for TRTRS */ 4111 PetscScalar *trs_rhs; 4112 PetscBLASInt Blas_NRHS; 4113 /* pointers for values insertion into change of basis matrix */ 4114 PetscInt *start_rows,*start_cols; 4115 PetscScalar *start_vals; 4116 /* working stuff for values insertion */ 4117 PetscBT is_primal; 4118 PetscInt *aux_primal_numbering_B; 4119 /* matrix sizes */ 4120 PetscInt global_size,local_size; 4121 /* temporary change of basis */ 4122 Mat localChangeOfBasisMatrix; 4123 /* extra space for debugging */ 4124 PetscScalar *dbg_work; 4125 4126 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4127 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4128 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4129 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4130 /* nonzeros for local mat */ 4131 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4132 if (!pcbddc->benign_change || pcbddc->fake_change) { 4133 for (i=0;i<pcis->n;i++) nnz[i]=1; 4134 } else { 4135 const PetscInt *ii; 4136 PetscInt n; 4137 PetscBool flg_row; 4138 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4139 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4140 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4141 } 4142 for (i=n_vertices;i<total_counts_cc;i++) { 4143 if (PetscBTLookup(change_basis,i)) { 4144 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4145 if (PetscBTLookup(qr_needed_idx,i)) { 4146 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4147 } else { 4148 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4149 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4150 } 4151 } 4152 } 4153 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4154 ierr = PetscFree(nnz);CHKERRQ(ierr); 4155 /* Set interior change in the matrix */ 4156 if (!pcbddc->benign_change || pcbddc->fake_change) { 4157 for (i=0;i<pcis->n;i++) { 4158 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4159 } 4160 } else { 4161 const PetscInt *ii,*jj; 4162 PetscScalar *aa; 4163 PetscInt n; 4164 PetscBool flg_row; 4165 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4166 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4167 for (i=0;i<n;i++) { 4168 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4169 } 4170 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4171 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4172 } 4173 4174 if (pcbddc->dbg_flag) { 4175 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4176 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4177 } 4178 4179 4180 /* Now we loop on the constraints which need a change of basis */ 4181 /* 4182 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4183 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4184 4185 Basic blocks of change of basis matrix T computed by 4186 4187 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4188 4189 | 1 0 ... 0 s_1/S | 4190 | 0 1 ... 0 s_2/S | 4191 | ... | 4192 | 0 ... 1 s_{n-1}/S | 4193 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4194 4195 with S = \sum_{i=1}^n s_i^2 4196 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4197 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4198 4199 - QR decomposition of constraints otherwise 4200 */ 4201 if (qr_needed) { 4202 /* space to store Q */ 4203 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4204 /* first we issue queries for optimal work */ 4205 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4206 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4207 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4208 lqr_work = -1; 4209 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4210 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4211 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4212 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4213 lgqr_work = -1; 4214 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4215 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4216 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4217 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4218 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4219 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4220 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4221 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4222 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4223 /* array to store scaling factors for reflectors */ 4224 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4225 /* array to store rhs and solution of triangular solver */ 4226 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4227 /* allocating workspace for check */ 4228 if (pcbddc->dbg_flag) { 4229 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4230 } 4231 } 4232 /* array to store whether a node is primal or not */ 4233 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4234 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4235 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4236 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4237 for (i=0;i<total_primal_vertices;i++) { 4238 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4239 } 4240 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4241 4242 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4243 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4244 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4245 if (PetscBTLookup(change_basis,total_counts)) { 4246 /* get constraint info */ 4247 primal_dofs = constraints_n[total_counts]; 4248 dual_dofs = size_of_constraint-primal_dofs; 4249 4250 if (pcbddc->dbg_flag) { 4251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4252 } 4253 4254 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4255 4256 /* copy quadrature constraints for change of basis check */ 4257 if (pcbddc->dbg_flag) { 4258 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4259 } 4260 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4261 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4262 4263 /* compute QR decomposition of constraints */ 4264 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4265 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4267 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4268 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4269 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4270 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4271 4272 /* explictly compute R^-T */ 4273 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4274 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4275 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4276 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4277 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4278 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4280 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4281 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4282 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4283 4284 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4285 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4287 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4289 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4290 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4291 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4292 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4293 4294 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4295 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4296 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4297 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4298 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4299 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4300 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4301 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4302 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4303 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4304 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4305 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4306 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4307 4308 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4309 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4310 /* insert cols for primal dofs */ 4311 for (j=0;j<primal_dofs;j++) { 4312 start_vals = &qr_basis[j*size_of_constraint]; 4313 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4314 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4315 } 4316 /* insert cols for dual dofs */ 4317 for (j=0,k=0;j<dual_dofs;k++) { 4318 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4319 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4320 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4321 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4322 j++; 4323 } 4324 } 4325 4326 /* check change of basis */ 4327 if (pcbddc->dbg_flag) { 4328 PetscInt ii,jj; 4329 PetscBool valid_qr=PETSC_TRUE; 4330 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4333 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4334 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4335 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4336 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4337 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4338 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4339 for (jj=0;jj<size_of_constraint;jj++) { 4340 for (ii=0;ii<primal_dofs;ii++) { 4341 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4342 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4343 } 4344 } 4345 if (!valid_qr) { 4346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4347 for (jj=0;jj<size_of_constraint;jj++) { 4348 for (ii=0;ii<primal_dofs;ii++) { 4349 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4350 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4351 } 4352 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4353 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4354 } 4355 } 4356 } 4357 } else { 4358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4359 } 4360 } 4361 } else { /* simple transformation block */ 4362 PetscInt row,col; 4363 PetscScalar val,norm; 4364 4365 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4366 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4367 for (j=0;j<size_of_constraint;j++) { 4368 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4369 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4370 if (!PetscBTLookup(is_primal,row_B)) { 4371 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4372 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4373 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4374 } else { 4375 for (k=0;k<size_of_constraint;k++) { 4376 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4377 if (row != col) { 4378 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4379 } else { 4380 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4381 } 4382 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4383 } 4384 } 4385 } 4386 if (pcbddc->dbg_flag) { 4387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4388 } 4389 } 4390 } else { 4391 if (pcbddc->dbg_flag) { 4392 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4393 } 4394 } 4395 } 4396 4397 /* free workspace */ 4398 if (qr_needed) { 4399 if (pcbddc->dbg_flag) { 4400 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4401 } 4402 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4403 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4404 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4405 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4406 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4407 } 4408 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4409 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4410 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4411 4412 /* assembling of global change of variable */ 4413 if (!pcbddc->fake_change) { 4414 Mat tmat; 4415 PetscInt bs; 4416 4417 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4418 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4419 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4420 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4421 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4422 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4423 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4424 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4425 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4426 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4427 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4428 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4429 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4430 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4431 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4432 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4433 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4434 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4435 4436 /* check */ 4437 if (pcbddc->dbg_flag) { 4438 PetscReal error; 4439 Vec x,x_change; 4440 4441 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4442 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4443 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4444 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4445 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4446 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4447 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4448 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4449 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4450 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4451 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4452 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4453 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4455 ierr = VecDestroy(&x);CHKERRQ(ierr); 4456 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4457 } 4458 /* adapt sub_schurs computed (if any) */ 4459 if (pcbddc->use_deluxe_scaling) { 4460 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4461 4462 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4463 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4464 } 4465 if (sub_schurs && sub_schurs->S_Ej_all) { 4466 Mat S_new,tmat; 4467 IS is_all_N,is_V_Sall = NULL; 4468 4469 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4470 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4471 if (pcbddc->deluxe_zerorows) { 4472 ISLocalToGlobalMapping NtoSall; 4473 IS is_V; 4474 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4475 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4476 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4477 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4478 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4479 } 4480 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4481 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4482 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4483 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4484 if (pcbddc->deluxe_zerorows) { 4485 const PetscScalar *array; 4486 const PetscInt *idxs_V,*idxs_all; 4487 PetscInt i,n_V; 4488 4489 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4490 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4491 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4492 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4493 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4494 for (i=0;i<n_V;i++) { 4495 PetscScalar val; 4496 PetscInt idx; 4497 4498 idx = idxs_V[i]; 4499 val = array[idxs_all[idxs_V[i]]]; 4500 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4501 } 4502 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4503 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4504 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4505 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4506 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4507 } 4508 sub_schurs->S_Ej_all = S_new; 4509 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4510 if (sub_schurs->sum_S_Ej_all) { 4511 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4512 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4513 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4514 if (pcbddc->deluxe_zerorows) { 4515 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4516 } 4517 sub_schurs->sum_S_Ej_all = S_new; 4518 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4519 } 4520 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4521 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4522 } 4523 /* destroy any change of basis context in sub_schurs */ 4524 if (sub_schurs && sub_schurs->change) { 4525 PetscInt i; 4526 4527 for (i=0;i<sub_schurs->n_subs;i++) { 4528 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4529 } 4530 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4531 } 4532 } 4533 if (pcbddc->switch_static) { /* need to save the local change */ 4534 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4535 } else { 4536 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4537 } 4538 /* determine if any process has changed the pressures locally */ 4539 pcbddc->change_interior = pcbddc->benign_have_null; 4540 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4541 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4542 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4543 pcbddc->use_qr_single = qr_needed; 4544 } 4545 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4546 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4547 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4548 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4549 } else { 4550 Mat benign_global = NULL; 4551 if (pcbddc->benign_have_null) { 4552 Mat tmat; 4553 4554 pcbddc->change_interior = PETSC_TRUE; 4555 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4556 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4557 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4558 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4559 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4560 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4561 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4562 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4563 if (pcbddc->benign_change) { 4564 Mat M; 4565 4566 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4567 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4568 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4569 ierr = MatDestroy(&M);CHKERRQ(ierr); 4570 } else { 4571 Mat eye; 4572 PetscScalar *array; 4573 4574 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4575 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4576 for (i=0;i<pcis->n;i++) { 4577 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4578 } 4579 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4580 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4581 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4582 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4583 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4584 } 4585 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4586 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4587 } 4588 if (pcbddc->user_ChangeOfBasisMatrix) { 4589 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4590 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4591 } else if (pcbddc->benign_have_null) { 4592 pcbddc->ChangeOfBasisMatrix = benign_global; 4593 } 4594 } 4595 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4596 IS is_global; 4597 const PetscInt *gidxs; 4598 4599 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4600 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4601 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4602 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4603 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4604 } 4605 } 4606 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4607 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4608 } 4609 4610 if (!pcbddc->fake_change) { 4611 /* add pressure dofs to set of primal nodes for numbering purposes */ 4612 for (i=0;i<pcbddc->benign_n;i++) { 4613 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4614 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4615 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4616 pcbddc->local_primal_size_cc++; 4617 pcbddc->local_primal_size++; 4618 } 4619 4620 /* check if a new primal space has been introduced (also take into account benign trick) */ 4621 pcbddc->new_primal_space_local = PETSC_TRUE; 4622 if (olocal_primal_size == pcbddc->local_primal_size) { 4623 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4624 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4625 if (!pcbddc->new_primal_space_local) { 4626 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4627 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4628 } 4629 } 4630 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4631 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4632 } 4633 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4634 4635 /* flush dbg viewer */ 4636 if (pcbddc->dbg_flag) { 4637 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4638 } 4639 4640 /* free workspace */ 4641 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4642 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4643 if (!pcbddc->adaptive_selection) { 4644 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4645 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4646 } else { 4647 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4648 pcbddc->adaptive_constraints_idxs_ptr, 4649 pcbddc->adaptive_constraints_data_ptr, 4650 pcbddc->adaptive_constraints_idxs, 4651 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4652 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4653 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4654 } 4655 PetscFunctionReturn(0); 4656 } 4657 4658 #undef __FUNCT__ 4659 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4660 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4661 { 4662 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4663 PC_IS *pcis = (PC_IS*)pc->data; 4664 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4665 PetscInt ierr,i,N; 4666 4667 PetscFunctionBegin; 4668 /* Reset previously computed graph */ 4669 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4670 /* Init local Graph struct */ 4671 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4672 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4673 4674 /* Check validity of the csr graph passed in by the user */ 4675 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4676 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4677 } 4678 4679 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4680 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4681 PetscInt *xadj,*adjncy; 4682 PetscInt nvtxs; 4683 PetscBool flg_row=PETSC_FALSE; 4684 4685 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4686 if (flg_row) { 4687 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4688 pcbddc->computed_rowadj = PETSC_TRUE; 4689 } 4690 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4691 } 4692 if (pcbddc->dbg_flag) { 4693 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4694 } 4695 4696 /* Setup of Graph */ 4697 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4698 4699 /* attach info on disconnected subdomains if present */ 4700 if (pcbddc->n_local_subs) { 4701 PetscInt *local_subs; 4702 4703 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4704 for (i=0;i<pcbddc->n_local_subs;i++) { 4705 const PetscInt *idxs; 4706 PetscInt nl,j; 4707 4708 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4709 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4710 for (j=0;j<nl;j++) { 4711 local_subs[idxs[j]] = i; 4712 } 4713 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4714 } 4715 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4716 pcbddc->mat_graph->local_subs = local_subs; 4717 } 4718 4719 /* Graph's connected components analysis */ 4720 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4721 PetscFunctionReturn(0); 4722 } 4723 4724 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4725 #undef __FUNCT__ 4726 #define __FUNCT__ "PCBDDCSubsetNumbering" 4727 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4728 { 4729 PetscSF sf; 4730 PetscLayout map; 4731 const PetscInt *idxs; 4732 PetscInt *leaf_data,*root_data,*gidxs; 4733 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4734 PetscInt n_n,nlocals,start,first_index; 4735 PetscMPIInt commsize; 4736 PetscBool first_found; 4737 PetscErrorCode ierr; 4738 4739 PetscFunctionBegin; 4740 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4741 if (subset_mult) { 4742 PetscCheckSameComm(subset,1,subset_mult,2); 4743 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4744 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4745 } 4746 /* create workspace layout for computing global indices of subset */ 4747 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4748 lbounds[0] = lbounds[1] = 0; 4749 for (i=0;i<n;i++) { 4750 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4751 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4752 } 4753 lbounds[0] = -lbounds[0]; 4754 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4755 gbounds[0] = -gbounds[0]; 4756 N = gbounds[1] - gbounds[0] + 1; 4757 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4758 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4759 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4760 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4761 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4762 4763 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4764 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4765 if (subset_mult) { 4766 const PetscInt* idxs_mult; 4767 4768 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4769 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4770 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4771 } else { 4772 for (i=0;i<n;i++) leaf_data[i] = 1; 4773 } 4774 /* local size of new subset */ 4775 n_n = 0; 4776 for (i=0;i<n;i++) n_n += leaf_data[i]; 4777 4778 /* global indexes in layout */ 4779 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4780 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4781 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4782 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4783 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4784 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4785 4786 /* reduce from leaves to roots */ 4787 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4788 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4789 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4790 4791 /* count indexes in local part of layout */ 4792 nlocals = 0; 4793 first_index = -1; 4794 first_found = PETSC_FALSE; 4795 for (i=0;i<Nl;i++) { 4796 if (!first_found && root_data[i]) { 4797 first_found = PETSC_TRUE; 4798 first_index = i; 4799 } 4800 nlocals += root_data[i]; 4801 } 4802 4803 /* cumulative of number of indexes and size of subset without holes */ 4804 #if defined(PETSC_HAVE_MPI_EXSCAN) 4805 start = 0; 4806 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4807 #else 4808 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4809 start = start-nlocals; 4810 #endif 4811 4812 if (N_n) { /* compute total size of new subset if requested */ 4813 *N_n = start + nlocals; 4814 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4815 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4816 } 4817 4818 /* adapt root data with cumulative */ 4819 if (first_found) { 4820 PetscInt old_index; 4821 4822 root_data[first_index] += start; 4823 old_index = first_index; 4824 for (i=first_index+1;i<Nl;i++) { 4825 if (root_data[i]) { 4826 root_data[i] += root_data[old_index]; 4827 old_index = i; 4828 } 4829 } 4830 } 4831 4832 /* from roots to leaves */ 4833 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4834 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4835 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4836 4837 /* create new IS with global indexes without holes */ 4838 if (subset_mult) { 4839 const PetscInt* idxs_mult; 4840 PetscInt cum; 4841 4842 cum = 0; 4843 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4844 for (i=0;i<n;i++) { 4845 PetscInt j; 4846 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4847 } 4848 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4849 } else { 4850 for (i=0;i<n;i++) { 4851 gidxs[i] = leaf_data[i]-1; 4852 } 4853 } 4854 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4855 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4856 PetscFunctionReturn(0); 4857 } 4858 4859 #undef __FUNCT__ 4860 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4861 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4862 { 4863 PetscInt i,j; 4864 PetscScalar *alphas; 4865 PetscErrorCode ierr; 4866 4867 PetscFunctionBegin; 4868 /* this implements stabilized Gram-Schmidt */ 4869 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4870 for (i=0;i<n;i++) { 4871 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4872 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4873 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4874 } 4875 ierr = PetscFree(alphas);CHKERRQ(ierr); 4876 PetscFunctionReturn(0); 4877 } 4878 4879 #undef __FUNCT__ 4880 #define __FUNCT__ "MatISGetSubassemblingPattern" 4881 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4882 { 4883 Mat A; 4884 PetscInt n_neighs,*neighs,*n_shared,**shared; 4885 PetscMPIInt size,rank,color; 4886 PetscInt *xadj,*adjncy; 4887 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4888 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4889 PetscInt void_procs,*procs_candidates = NULL; 4890 PetscInt xadj_count, *count; 4891 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4892 PetscSubcomm psubcomm; 4893 MPI_Comm subcomm; 4894 PetscErrorCode ierr; 4895 4896 PetscFunctionBegin; 4897 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4898 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4899 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4900 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4901 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4902 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4903 4904 if (have_void) *have_void = PETSC_FALSE; 4905 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4906 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4907 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4908 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4909 im_active = !!(n); 4910 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4911 void_procs = size - active_procs; 4912 /* get ranks of of non-active processes in mat communicator */ 4913 if (void_procs) { 4914 PetscInt ncand; 4915 4916 if (have_void) *have_void = PETSC_TRUE; 4917 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4918 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4919 for (i=0,ncand=0;i<size;i++) { 4920 if (!procs_candidates[i]) { 4921 procs_candidates[ncand++] = i; 4922 } 4923 } 4924 /* force n_subdomains to be not greater that the number of non-active processes */ 4925 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4926 } 4927 4928 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4929 if (active_procs < *n_subdomains) { 4930 PetscInt issize,isidx; 4931 if (im_active) { 4932 issize = 1; 4933 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4934 isidx = procs_candidates[rank]; 4935 } else { 4936 isidx = rank; 4937 } 4938 } else { 4939 issize = 0; 4940 isidx = -1; 4941 } 4942 *n_subdomains = active_procs; 4943 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4944 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4945 PetscFunctionReturn(0); 4946 } 4947 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4948 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4949 threshold = PetscMax(threshold,2); 4950 4951 /* Get info on mapping */ 4952 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4953 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4954 4955 /* build local CSR graph of subdomains' connectivity */ 4956 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4957 xadj[0] = 0; 4958 xadj[1] = PetscMax(n_neighs-1,0); 4959 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4960 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4961 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4962 for (i=1;i<n_neighs;i++) 4963 for (j=0;j<n_shared[i];j++) 4964 count[shared[i][j]] += 1; 4965 4966 xadj_count = 0; 4967 for (i=1;i<n_neighs;i++) { 4968 for (j=0;j<n_shared[i];j++) { 4969 if (count[shared[i][j]] < threshold) { 4970 adjncy[xadj_count] = neighs[i]; 4971 adjncy_wgt[xadj_count] = n_shared[i]; 4972 xadj_count++; 4973 break; 4974 } 4975 } 4976 } 4977 xadj[1] = xadj_count; 4978 ierr = PetscFree(count);CHKERRQ(ierr); 4979 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4980 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4981 4982 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4983 4984 /* Restrict work on active processes only */ 4985 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4986 if (void_procs) { 4987 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4988 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4989 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4990 subcomm = PetscSubcommChild(psubcomm); 4991 } else { 4992 psubcomm = NULL; 4993 subcomm = PetscObjectComm((PetscObject)mat); 4994 } 4995 4996 v_wgt = NULL; 4997 if (!color) { 4998 ierr = PetscFree(xadj);CHKERRQ(ierr); 4999 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5000 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5001 } else { 5002 Mat subdomain_adj; 5003 IS new_ranks,new_ranks_contig; 5004 MatPartitioning partitioner; 5005 PetscInt rstart=0,rend=0; 5006 PetscInt *is_indices,*oldranks; 5007 PetscMPIInt size; 5008 PetscBool aggregate; 5009 5010 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5011 if (void_procs) { 5012 PetscInt prank = rank; 5013 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5014 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5015 for (i=0;i<xadj[1];i++) { 5016 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5017 } 5018 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5019 } else { 5020 oldranks = NULL; 5021 } 5022 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5023 if (aggregate) { /* TODO: all this part could be made more efficient */ 5024 PetscInt lrows,row,ncols,*cols; 5025 PetscMPIInt nrank; 5026 PetscScalar *vals; 5027 5028 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5029 lrows = 0; 5030 if (nrank<redprocs) { 5031 lrows = size/redprocs; 5032 if (nrank<size%redprocs) lrows++; 5033 } 5034 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5035 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5036 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5037 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5038 row = nrank; 5039 ncols = xadj[1]-xadj[0]; 5040 cols = adjncy; 5041 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5042 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5043 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5044 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5045 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5046 ierr = PetscFree(xadj);CHKERRQ(ierr); 5047 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5048 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5049 ierr = PetscFree(vals);CHKERRQ(ierr); 5050 if (use_vwgt) { 5051 Vec v; 5052 const PetscScalar *array; 5053 PetscInt nl; 5054 5055 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5056 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5057 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5058 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5059 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5060 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5061 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5062 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5063 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5064 ierr = VecDestroy(&v);CHKERRQ(ierr); 5065 } 5066 } else { 5067 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5068 if (use_vwgt) { 5069 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5070 v_wgt[0] = local_size; 5071 } 5072 } 5073 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5074 5075 /* Partition */ 5076 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5077 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5078 if (v_wgt) { 5079 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5080 } 5081 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5082 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5083 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5084 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5085 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5086 5087 /* renumber new_ranks to avoid "holes" in new set of processors */ 5088 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5089 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5090 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5091 if (!aggregate) { 5092 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5093 #if defined(PETSC_USE_DEBUG) 5094 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5095 #endif 5096 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5097 } else if (oldranks) { 5098 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5099 } else { 5100 ranks_send_to_idx[0] = is_indices[0]; 5101 } 5102 } else { 5103 PetscInt idxs[1]; 5104 PetscMPIInt tag; 5105 MPI_Request *reqs; 5106 5107 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5108 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5109 for (i=rstart;i<rend;i++) { 5110 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5111 } 5112 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5113 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5114 ierr = PetscFree(reqs);CHKERRQ(ierr); 5115 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5116 #if defined(PETSC_USE_DEBUG) 5117 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5118 #endif 5119 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5120 } else if (oldranks) { 5121 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5122 } else { 5123 ranks_send_to_idx[0] = idxs[0]; 5124 } 5125 } 5126 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5127 /* clean up */ 5128 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5129 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5130 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5131 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5132 } 5133 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5134 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5135 5136 /* assemble parallel IS for sends */ 5137 i = 1; 5138 if (!color) i=0; 5139 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5140 PetscFunctionReturn(0); 5141 } 5142 5143 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5144 5145 #undef __FUNCT__ 5146 #define __FUNCT__ "MatISSubassemble" 5147 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5148 { 5149 Mat local_mat; 5150 IS is_sends_internal; 5151 PetscInt rows,cols,new_local_rows; 5152 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5153 PetscBool ismatis,isdense,newisdense,destroy_mat; 5154 ISLocalToGlobalMapping l2gmap; 5155 PetscInt* l2gmap_indices; 5156 const PetscInt* is_indices; 5157 MatType new_local_type; 5158 /* buffers */ 5159 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5160 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5161 PetscInt *recv_buffer_idxs_local; 5162 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5163 /* MPI */ 5164 MPI_Comm comm,comm_n; 5165 PetscSubcomm subcomm; 5166 PetscMPIInt n_sends,n_recvs,commsize; 5167 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5168 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5169 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5170 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5171 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5172 PetscErrorCode ierr; 5173 5174 PetscFunctionBegin; 5175 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5176 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5177 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5178 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5179 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5180 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5181 PetscValidLogicalCollectiveBool(mat,reuse,6); 5182 PetscValidLogicalCollectiveInt(mat,nis,8); 5183 5184 /* further checks */ 5185 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5186 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5187 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5188 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5189 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5190 if (reuse && *mat_n) { 5191 PetscInt mrows,mcols,mnrows,mncols; 5192 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5193 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5194 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5195 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5196 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5197 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5198 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5199 } 5200 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5201 PetscValidLogicalCollectiveInt(mat,bs,0); 5202 5203 /* prepare IS for sending if not provided */ 5204 if (!is_sends) { 5205 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5206 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5207 } else { 5208 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5209 is_sends_internal = is_sends; 5210 } 5211 5212 /* get comm */ 5213 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5214 5215 /* compute number of sends */ 5216 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5217 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5218 5219 /* compute number of receives */ 5220 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5221 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5222 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5223 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5224 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5225 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5226 ierr = PetscFree(iflags);CHKERRQ(ierr); 5227 5228 /* restrict comm if requested */ 5229 subcomm = 0; 5230 destroy_mat = PETSC_FALSE; 5231 if (restrict_comm) { 5232 PetscMPIInt color,subcommsize; 5233 5234 color = 0; 5235 if (restrict_full) { 5236 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5237 } else { 5238 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5239 } 5240 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5241 subcommsize = commsize - subcommsize; 5242 /* check if reuse has been requested */ 5243 if (reuse) { 5244 if (*mat_n) { 5245 PetscMPIInt subcommsize2; 5246 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5247 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5248 comm_n = PetscObjectComm((PetscObject)*mat_n); 5249 } else { 5250 comm_n = PETSC_COMM_SELF; 5251 } 5252 } else { /* MAT_INITIAL_MATRIX */ 5253 PetscMPIInt rank; 5254 5255 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5256 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5257 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5258 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5259 comm_n = PetscSubcommChild(subcomm); 5260 } 5261 /* flag to destroy *mat_n if not significative */ 5262 if (color) destroy_mat = PETSC_TRUE; 5263 } else { 5264 comm_n = comm; 5265 } 5266 5267 /* prepare send/receive buffers */ 5268 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5269 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5270 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5271 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5272 if (nis) { 5273 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5274 } 5275 5276 /* Get data from local matrices */ 5277 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5278 /* TODO: See below some guidelines on how to prepare the local buffers */ 5279 /* 5280 send_buffer_vals should contain the raw values of the local matrix 5281 send_buffer_idxs should contain: 5282 - MatType_PRIVATE type 5283 - PetscInt size_of_l2gmap 5284 - PetscInt global_row_indices[size_of_l2gmap] 5285 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5286 */ 5287 else { 5288 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5289 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5290 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5291 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5292 send_buffer_idxs[1] = i; 5293 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5294 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5295 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5296 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5297 for (i=0;i<n_sends;i++) { 5298 ilengths_vals[is_indices[i]] = len*len; 5299 ilengths_idxs[is_indices[i]] = len+2; 5300 } 5301 } 5302 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5303 /* additional is (if any) */ 5304 if (nis) { 5305 PetscMPIInt psum; 5306 PetscInt j; 5307 for (j=0,psum=0;j<nis;j++) { 5308 PetscInt plen; 5309 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5310 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5311 psum += len+1; /* indices + lenght */ 5312 } 5313 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5314 for (j=0,psum=0;j<nis;j++) { 5315 PetscInt plen; 5316 const PetscInt *is_array_idxs; 5317 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5318 send_buffer_idxs_is[psum] = plen; 5319 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5320 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5321 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5322 psum += plen+1; /* indices + lenght */ 5323 } 5324 for (i=0;i<n_sends;i++) { 5325 ilengths_idxs_is[is_indices[i]] = psum; 5326 } 5327 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5328 } 5329 5330 buf_size_idxs = 0; 5331 buf_size_vals = 0; 5332 buf_size_idxs_is = 0; 5333 for (i=0;i<n_recvs;i++) { 5334 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5335 buf_size_vals += (PetscInt)olengths_vals[i]; 5336 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5337 } 5338 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5339 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5340 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5341 5342 /* get new tags for clean communications */ 5343 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5344 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5345 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5346 5347 /* allocate for requests */ 5348 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5349 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5350 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5351 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5352 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5353 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5354 5355 /* communications */ 5356 ptr_idxs = recv_buffer_idxs; 5357 ptr_vals = recv_buffer_vals; 5358 ptr_idxs_is = recv_buffer_idxs_is; 5359 for (i=0;i<n_recvs;i++) { 5360 source_dest = onodes[i]; 5361 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5362 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5363 ptr_idxs += olengths_idxs[i]; 5364 ptr_vals += olengths_vals[i]; 5365 if (nis) { 5366 source_dest = onodes_is[i]; 5367 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5368 ptr_idxs_is += olengths_idxs_is[i]; 5369 } 5370 } 5371 for (i=0;i<n_sends;i++) { 5372 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5373 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5374 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5375 if (nis) { 5376 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5377 } 5378 } 5379 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5380 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5381 5382 /* assemble new l2g map */ 5383 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5384 ptr_idxs = recv_buffer_idxs; 5385 new_local_rows = 0; 5386 for (i=0;i<n_recvs;i++) { 5387 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5388 ptr_idxs += olengths_idxs[i]; 5389 } 5390 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5391 ptr_idxs = recv_buffer_idxs; 5392 new_local_rows = 0; 5393 for (i=0;i<n_recvs;i++) { 5394 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5395 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5396 ptr_idxs += olengths_idxs[i]; 5397 } 5398 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5399 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5400 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5401 5402 /* infer new local matrix type from received local matrices type */ 5403 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5404 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5405 if (n_recvs) { 5406 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5407 ptr_idxs = recv_buffer_idxs; 5408 for (i=0;i<n_recvs;i++) { 5409 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5410 new_local_type_private = MATAIJ_PRIVATE; 5411 break; 5412 } 5413 ptr_idxs += olengths_idxs[i]; 5414 } 5415 switch (new_local_type_private) { 5416 case MATDENSE_PRIVATE: 5417 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5418 new_local_type = MATSEQAIJ; 5419 bs = 1; 5420 } else { /* if I receive only 1 dense matrix */ 5421 new_local_type = MATSEQDENSE; 5422 bs = 1; 5423 } 5424 break; 5425 case MATAIJ_PRIVATE: 5426 new_local_type = MATSEQAIJ; 5427 bs = 1; 5428 break; 5429 case MATBAIJ_PRIVATE: 5430 new_local_type = MATSEQBAIJ; 5431 break; 5432 case MATSBAIJ_PRIVATE: 5433 new_local_type = MATSEQSBAIJ; 5434 break; 5435 default: 5436 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5437 break; 5438 } 5439 } else { /* by default, new_local_type is seqdense */ 5440 new_local_type = MATSEQDENSE; 5441 bs = 1; 5442 } 5443 5444 /* create MATIS object if needed */ 5445 if (!reuse) { 5446 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5447 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5448 } else { 5449 /* it also destroys the local matrices */ 5450 if (*mat_n) { 5451 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5452 } else { /* this is a fake object */ 5453 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5454 } 5455 } 5456 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5457 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5458 5459 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5460 5461 /* Global to local map of received indices */ 5462 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5463 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5464 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5465 5466 /* restore attributes -> type of incoming data and its size */ 5467 buf_size_idxs = 0; 5468 for (i=0;i<n_recvs;i++) { 5469 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5470 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5471 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5472 } 5473 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5474 5475 /* set preallocation */ 5476 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5477 if (!newisdense) { 5478 PetscInt *new_local_nnz=0; 5479 5480 ptr_idxs = recv_buffer_idxs_local; 5481 if (n_recvs) { 5482 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5483 } 5484 for (i=0;i<n_recvs;i++) { 5485 PetscInt j; 5486 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5487 for (j=0;j<*(ptr_idxs+1);j++) { 5488 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5489 } 5490 } else { 5491 /* TODO */ 5492 } 5493 ptr_idxs += olengths_idxs[i]; 5494 } 5495 if (new_local_nnz) { 5496 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5497 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5498 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5499 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5500 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5501 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5502 } else { 5503 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5504 } 5505 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5506 } else { 5507 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5508 } 5509 5510 /* set values */ 5511 ptr_vals = recv_buffer_vals; 5512 ptr_idxs = recv_buffer_idxs_local; 5513 for (i=0;i<n_recvs;i++) { 5514 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5515 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5516 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5517 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5518 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5519 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5520 } else { 5521 /* TODO */ 5522 } 5523 ptr_idxs += olengths_idxs[i]; 5524 ptr_vals += olengths_vals[i]; 5525 } 5526 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5527 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5528 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5529 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5530 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5531 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5532 5533 #if 0 5534 if (!restrict_comm) { /* check */ 5535 Vec lvec,rvec; 5536 PetscReal infty_error; 5537 5538 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5539 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5540 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5541 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5542 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5543 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5544 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5545 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5546 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5547 } 5548 #endif 5549 5550 /* assemble new additional is (if any) */ 5551 if (nis) { 5552 PetscInt **temp_idxs,*count_is,j,psum; 5553 5554 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5555 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5556 ptr_idxs = recv_buffer_idxs_is; 5557 psum = 0; 5558 for (i=0;i<n_recvs;i++) { 5559 for (j=0;j<nis;j++) { 5560 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5561 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5562 psum += plen; 5563 ptr_idxs += plen+1; /* shift pointer to received data */ 5564 } 5565 } 5566 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5567 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5568 for (i=1;i<nis;i++) { 5569 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5570 } 5571 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5572 ptr_idxs = recv_buffer_idxs_is; 5573 for (i=0;i<n_recvs;i++) { 5574 for (j=0;j<nis;j++) { 5575 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5576 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5577 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5578 ptr_idxs += plen+1; /* shift pointer to received data */ 5579 } 5580 } 5581 for (i=0;i<nis;i++) { 5582 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5583 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5584 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5585 } 5586 ierr = PetscFree(count_is);CHKERRQ(ierr); 5587 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5588 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5589 } 5590 /* free workspace */ 5591 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5592 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5593 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5594 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5595 if (isdense) { 5596 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5597 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5598 } else { 5599 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5600 } 5601 if (nis) { 5602 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5603 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5604 } 5605 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5606 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5607 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5608 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5609 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5610 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5611 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5612 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5613 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5614 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5615 ierr = PetscFree(onodes);CHKERRQ(ierr); 5616 if (nis) { 5617 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5618 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5619 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5620 } 5621 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5622 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5623 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5624 for (i=0;i<nis;i++) { 5625 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5626 } 5627 *mat_n = NULL; 5628 } 5629 PetscFunctionReturn(0); 5630 } 5631 5632 /* temporary hack into ksp private data structure */ 5633 #include <petsc/private/kspimpl.h> 5634 5635 #undef __FUNCT__ 5636 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5637 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5638 { 5639 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5640 PC_IS *pcis = (PC_IS*)pc->data; 5641 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5642 MatNullSpace CoarseNullSpace = NULL; 5643 ISLocalToGlobalMapping coarse_islg; 5644 IS coarse_is,*isarray; 5645 PetscInt i,im_active=-1,active_procs=-1; 5646 PetscInt nis,nisdofs,nisneu,nisvert; 5647 PC pc_temp; 5648 PCType coarse_pc_type; 5649 KSPType coarse_ksp_type; 5650 PetscBool multilevel_requested,multilevel_allowed; 5651 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5652 Mat t_coarse_mat_is; 5653 PetscInt ncoarse; 5654 PetscBool compute_vecs = PETSC_FALSE; 5655 PetscScalar *array; 5656 MatReuse coarse_mat_reuse; 5657 PetscBool restr, full_restr, have_void; 5658 PetscErrorCode ierr; 5659 5660 PetscFunctionBegin; 5661 /* Assign global numbering to coarse dofs */ 5662 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5663 PetscInt ocoarse_size; 5664 compute_vecs = PETSC_TRUE; 5665 ocoarse_size = pcbddc->coarse_size; 5666 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5667 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5668 /* see if we can avoid some work */ 5669 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5670 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5671 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5672 PC pc; 5673 PetscBool isbddc; 5674 5675 /* temporary workaround since PCBDDC does not have a reset method so far */ 5676 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5677 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5678 if (isbddc) { 5679 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5680 } else { 5681 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5682 } 5683 coarse_reuse = PETSC_FALSE; 5684 } else { /* we can safely reuse already computed coarse matrix */ 5685 coarse_reuse = PETSC_TRUE; 5686 } 5687 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5688 coarse_reuse = PETSC_FALSE; 5689 } 5690 /* reset any subassembling information */ 5691 if (!coarse_reuse || pcbddc->recompute_topography) { 5692 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5693 } 5694 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5695 coarse_reuse = PETSC_TRUE; 5696 } 5697 /* assemble coarse matrix */ 5698 if (coarse_reuse && pcbddc->coarse_ksp) { 5699 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5700 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5701 coarse_mat_reuse = MAT_REUSE_MATRIX; 5702 } else { 5703 coarse_mat = NULL; 5704 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5705 } 5706 5707 /* creates temporary l2gmap and IS for coarse indexes */ 5708 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5709 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5710 5711 /* creates temporary MATIS object for coarse matrix */ 5712 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5713 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5714 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5715 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5716 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5717 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5718 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5719 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5720 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5721 5722 /* count "active" (i.e. with positive local size) and "void" processes */ 5723 im_active = !!(pcis->n); 5724 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5725 5726 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5727 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5728 /* full_restr : just use the receivers from the subassembling pattern */ 5729 coarse_mat_is = NULL; 5730 multilevel_allowed = PETSC_FALSE; 5731 multilevel_requested = PETSC_FALSE; 5732 full_restr = PETSC_TRUE; 5733 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5734 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5735 if (multilevel_requested) { 5736 ncoarse = active_procs/pcbddc->coarsening_ratio; 5737 restr = PETSC_FALSE; 5738 full_restr = PETSC_FALSE; 5739 } else { 5740 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5741 restr = PETSC_TRUE; 5742 full_restr = PETSC_TRUE; 5743 } 5744 ncoarse = PetscMax(1,ncoarse); 5745 if (!pcbddc->coarse_subassembling) { 5746 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5747 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5748 PetscInt psum; 5749 PetscMPIInt size; 5750 if (pcbddc->coarse_ksp) psum = 1; 5751 else psum = 0; 5752 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5753 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5754 if (ncoarse < size) have_void = PETSC_TRUE; 5755 } 5756 /* determine if we can go multilevel */ 5757 if (multilevel_requested) { 5758 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5759 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5760 } 5761 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5762 5763 /* dump subassembling pattern */ 5764 if (pcbddc->dbg_flag && multilevel_allowed) { 5765 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5766 } 5767 5768 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5769 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5770 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5771 const PetscInt *idxs; 5772 ISLocalToGlobalMapping tmap; 5773 5774 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5775 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5776 /* allocate space for temporary storage */ 5777 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5778 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5779 /* allocate for IS array */ 5780 nisdofs = pcbddc->n_ISForDofsLocal; 5781 nisneu = !!pcbddc->NeumannBoundariesLocal; 5782 nisvert = 0; /* nisvert is not used */ 5783 nis = nisdofs + nisneu + nisvert; 5784 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5785 /* dofs splitting */ 5786 for (i=0;i<nisdofs;i++) { 5787 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5788 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5789 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5790 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5791 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5792 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5793 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5794 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5795 } 5796 /* neumann boundaries */ 5797 if (pcbddc->NeumannBoundariesLocal) { 5798 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5799 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5800 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5801 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5802 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5803 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5804 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5805 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5806 } 5807 /* free memory */ 5808 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5809 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5810 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5811 } else { 5812 nis = 0; 5813 nisdofs = 0; 5814 nisneu = 0; 5815 nisvert = 0; 5816 isarray = NULL; 5817 } 5818 /* destroy no longer needed map */ 5819 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5820 5821 /* subassemble */ 5822 if (multilevel_allowed) { 5823 PetscBool reuse,reuser; 5824 if (coarse_mat) reuse = PETSC_TRUE; 5825 else reuse = PETSC_FALSE; 5826 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5827 if (reuser) { 5828 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5829 } else { 5830 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5831 } 5832 /* TODO: if (pcbddc->benign_have_null) -> give a hint to the coarser levels if they have to locally apply the benign trick or not */ 5833 } else { 5834 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5835 } 5836 if (coarse_mat_is || coarse_mat) { 5837 PetscMPIInt size; 5838 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5839 if (!multilevel_allowed) { 5840 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5841 } else { 5842 Mat A; 5843 5844 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5845 if (coarse_mat_is) { 5846 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5847 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5848 coarse_mat = coarse_mat_is; 5849 } 5850 /* be sure we don't have MatSeqDENSE as local mat */ 5851 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5852 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5853 } 5854 } 5855 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5856 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5857 5858 /* create local to global scatters for coarse problem */ 5859 if (compute_vecs) { 5860 PetscInt lrows; 5861 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5862 if (coarse_mat) { 5863 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5864 } else { 5865 lrows = 0; 5866 } 5867 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5868 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5869 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5870 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5871 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5872 } 5873 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5874 5875 /* set defaults for coarse KSP and PC */ 5876 if (multilevel_allowed) { 5877 coarse_ksp_type = KSPRICHARDSON; 5878 coarse_pc_type = PCBDDC; 5879 } else { 5880 coarse_ksp_type = KSPPREONLY; 5881 coarse_pc_type = PCREDUNDANT; 5882 } 5883 5884 /* print some info if requested */ 5885 if (pcbddc->dbg_flag) { 5886 if (!multilevel_allowed) { 5887 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5888 if (multilevel_requested) { 5889 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5890 } else if (pcbddc->max_levels) { 5891 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5892 } 5893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5894 } 5895 } 5896 5897 /* create the coarse KSP object only once with defaults */ 5898 if (coarse_mat) { 5899 PetscViewer dbg_viewer = NULL; 5900 if (pcbddc->dbg_flag) { 5901 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5902 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5903 } 5904 if (!pcbddc->coarse_ksp) { 5905 char prefix[256],str_level[16]; 5906 size_t len; 5907 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5908 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5909 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5910 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5911 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5912 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5913 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5914 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5915 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5916 /* prefix */ 5917 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5918 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5919 if (!pcbddc->current_level) { 5920 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5921 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5922 } else { 5923 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5924 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5925 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5926 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5927 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5928 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5929 } 5930 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5931 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5932 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5933 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5934 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5935 /* allow user customization */ 5936 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5937 } 5938 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5939 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5940 if (nisdofs) { 5941 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5942 for (i=0;i<nisdofs;i++) { 5943 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5944 } 5945 } 5946 if (nisneu) { 5947 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5948 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5949 } 5950 if (nisvert) { 5951 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5952 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5953 } 5954 5955 /* get some info after set from options */ 5956 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5957 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5958 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5959 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5960 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5961 isbddc = PETSC_FALSE; 5962 } 5963 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5964 if (isredundant) { 5965 KSP inner_ksp; 5966 PC inner_pc; 5967 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5968 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5969 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5970 } 5971 5972 /* parameters which miss an API */ 5973 if (isbddc) { 5974 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5975 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5976 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5977 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5978 if (pcbddc_coarse->benign_saddle_point) { 5979 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5980 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5981 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5982 } 5983 } 5984 5985 /* propagate symmetry info of coarse matrix */ 5986 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5987 if (pc->pmat->symmetric_set) { 5988 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5989 } 5990 if (pc->pmat->hermitian_set) { 5991 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5992 } 5993 if (pc->pmat->spd_set) { 5994 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5995 } 5996 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 5997 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 5998 } 5999 /* set operators */ 6000 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6001 if (pcbddc->dbg_flag) { 6002 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6003 } 6004 } 6005 ierr = PetscFree(isarray);CHKERRQ(ierr); 6006 #if 0 6007 { 6008 PetscViewer viewer; 6009 char filename[256]; 6010 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6011 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6012 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6013 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6014 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6015 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6016 } 6017 #endif 6018 6019 if (pcbddc->coarse_ksp) { 6020 Vec crhs,csol; 6021 6022 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6023 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6024 if (!csol) { 6025 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6026 } 6027 if (!crhs) { 6028 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6029 } 6030 } 6031 6032 /* compute null space for coarse solver if the benign trick has been requested */ 6033 if (pcbddc->benign_null) { 6034 6035 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6036 for (i=0;i<pcbddc->benign_n;i++) { 6037 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6038 } 6039 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6040 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6041 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6042 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6043 if (coarse_mat) { 6044 Vec nullv; 6045 PetscScalar *array,*array2; 6046 PetscInt nl; 6047 6048 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6049 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6050 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6051 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6052 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6053 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6054 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6055 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6056 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6057 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6058 } 6059 } 6060 6061 if (pcbddc->coarse_ksp) { 6062 PetscBool ispreonly; 6063 6064 if (CoarseNullSpace) { 6065 PetscBool isnull; 6066 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6067 if (isnull) { 6068 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6069 } 6070 /* TODO: add local nullspaces (if any) */ 6071 } 6072 /* setup coarse ksp */ 6073 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6074 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6075 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6076 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6077 KSP check_ksp; 6078 KSPType check_ksp_type; 6079 PC check_pc; 6080 Vec check_vec,coarse_vec; 6081 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6082 PetscInt its; 6083 PetscBool compute_eigs; 6084 PetscReal *eigs_r,*eigs_c; 6085 PetscInt neigs; 6086 const char *prefix; 6087 6088 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6089 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6090 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6091 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6092 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6093 /* prevent from setup unneeded object */ 6094 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6095 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6096 if (ispreonly) { 6097 check_ksp_type = KSPPREONLY; 6098 compute_eigs = PETSC_FALSE; 6099 } else { 6100 check_ksp_type = KSPGMRES; 6101 compute_eigs = PETSC_TRUE; 6102 } 6103 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6104 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6105 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6106 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6107 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6108 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6109 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6110 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6111 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6112 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6113 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6114 /* create random vec */ 6115 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6116 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6117 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6118 /* solve coarse problem */ 6119 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6120 /* set eigenvalue estimation if preonly has not been requested */ 6121 if (compute_eigs) { 6122 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6123 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6124 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6125 lambda_max = eigs_r[neigs-1]; 6126 lambda_min = eigs_r[0]; 6127 if (pcbddc->use_coarse_estimates) { 6128 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6129 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6130 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6131 } 6132 } 6133 } 6134 6135 /* check coarse problem residual error */ 6136 if (pcbddc->dbg_flag) { 6137 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6138 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6139 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6140 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6141 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6142 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6143 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6144 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6145 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6146 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6147 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6148 if (CoarseNullSpace) { 6149 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6150 } 6151 if (compute_eigs) { 6152 PetscReal lambda_max_s,lambda_min_s; 6153 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6154 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6155 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6156 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6157 for (i=0;i<neigs;i++) { 6158 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6159 } 6160 } 6161 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6162 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6163 } 6164 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6165 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6166 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6167 if (compute_eigs) { 6168 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6169 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6170 } 6171 } 6172 } 6173 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6174 /* print additional info */ 6175 if (pcbddc->dbg_flag) { 6176 /* waits until all processes reaches this point */ 6177 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6178 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6179 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6180 } 6181 6182 /* free memory */ 6183 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6184 PetscFunctionReturn(0); 6185 } 6186 6187 #undef __FUNCT__ 6188 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6189 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6190 { 6191 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6192 PC_IS* pcis = (PC_IS*)pc->data; 6193 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6194 IS subset,subset_mult,subset_n; 6195 PetscInt local_size,coarse_size=0; 6196 PetscInt *local_primal_indices=NULL; 6197 const PetscInt *t_local_primal_indices; 6198 PetscErrorCode ierr; 6199 6200 PetscFunctionBegin; 6201 /* Compute global number of coarse dofs */ 6202 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6203 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6204 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6205 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6206 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6207 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6208 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6209 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6210 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6211 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6212 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6213 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6214 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6215 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6216 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6217 6218 /* check numbering */ 6219 if (pcbddc->dbg_flag) { 6220 PetscScalar coarsesum,*array,*array2; 6221 PetscInt i; 6222 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6223 6224 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6225 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6226 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6227 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6228 /* counter */ 6229 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6230 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6231 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6232 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6233 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6234 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6235 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6236 for (i=0;i<pcbddc->local_primal_size;i++) { 6237 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6238 } 6239 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6240 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6241 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6242 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6243 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6244 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6245 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6246 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6247 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6248 for (i=0;i<pcis->n;i++) { 6249 if (array[i] != 0.0 && array[i] != array2[i]) { 6250 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6251 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6252 set_error = PETSC_TRUE; 6253 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6254 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6255 } 6256 } 6257 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6258 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6259 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6260 for (i=0;i<pcis->n;i++) { 6261 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6262 } 6263 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6264 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6265 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6266 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6267 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6268 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6269 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6270 PetscInt *gidxs; 6271 6272 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6273 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6274 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6277 for (i=0;i<pcbddc->local_primal_size;i++) { 6278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6279 } 6280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6281 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6282 } 6283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6284 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6285 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6286 } 6287 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6288 /* get back data */ 6289 *coarse_size_n = coarse_size; 6290 *local_primal_indices_n = local_primal_indices; 6291 PetscFunctionReturn(0); 6292 } 6293 6294 #undef __FUNCT__ 6295 #define __FUNCT__ "PCBDDCGlobalToLocal" 6296 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6297 { 6298 IS localis_t; 6299 PetscInt i,lsize,*idxs,n; 6300 PetscScalar *vals; 6301 PetscErrorCode ierr; 6302 6303 PetscFunctionBegin; 6304 /* get indices in local ordering exploiting local to global map */ 6305 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6306 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6307 for (i=0;i<lsize;i++) vals[i] = 1.0; 6308 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6309 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6310 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6311 if (idxs) { /* multilevel guard */ 6312 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6313 } 6314 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6315 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6316 ierr = PetscFree(vals);CHKERRQ(ierr); 6317 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6318 /* now compute set in local ordering */ 6319 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6320 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6321 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6322 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6323 for (i=0,lsize=0;i<n;i++) { 6324 if (PetscRealPart(vals[i]) > 0.5) { 6325 lsize++; 6326 } 6327 } 6328 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6329 for (i=0,lsize=0;i<n;i++) { 6330 if (PetscRealPart(vals[i]) > 0.5) { 6331 idxs[lsize++] = i; 6332 } 6333 } 6334 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6335 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6336 *localis = localis_t; 6337 PetscFunctionReturn(0); 6338 } 6339 6340 #undef __FUNCT__ 6341 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6342 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6343 { 6344 PC_IS *pcis=(PC_IS*)pc->data; 6345 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6346 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6347 Mat S_j; 6348 PetscInt *used_xadj,*used_adjncy; 6349 PetscBool free_used_adj; 6350 PetscErrorCode ierr; 6351 6352 PetscFunctionBegin; 6353 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6354 free_used_adj = PETSC_FALSE; 6355 if (pcbddc->sub_schurs_layers == -1) { 6356 used_xadj = NULL; 6357 used_adjncy = NULL; 6358 } else { 6359 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6360 used_xadj = pcbddc->mat_graph->xadj; 6361 used_adjncy = pcbddc->mat_graph->adjncy; 6362 } else if (pcbddc->computed_rowadj) { 6363 used_xadj = pcbddc->mat_graph->xadj; 6364 used_adjncy = pcbddc->mat_graph->adjncy; 6365 } else { 6366 PetscBool flg_row=PETSC_FALSE; 6367 const PetscInt *xadj,*adjncy; 6368 PetscInt nvtxs; 6369 6370 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6371 if (flg_row) { 6372 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6373 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6374 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6375 free_used_adj = PETSC_TRUE; 6376 } else { 6377 pcbddc->sub_schurs_layers = -1; 6378 used_xadj = NULL; 6379 used_adjncy = NULL; 6380 } 6381 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6382 } 6383 } 6384 6385 /* setup sub_schurs data */ 6386 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6387 if (!sub_schurs->schur_explicit) { 6388 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6389 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6390 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6391 } else { 6392 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6393 PetscBool isseqaij,need_change = PETSC_FALSE;; 6394 PetscInt benign_n; 6395 Mat change = NULL; 6396 Vec scaling = NULL; 6397 IS change_primal = NULL; 6398 6399 if (!pcbddc->use_vertices && reuse_solvers) { 6400 PetscInt n_vertices; 6401 6402 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6403 reuse_solvers = (PetscBool)!n_vertices; 6404 } 6405 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6406 if (!isseqaij) { 6407 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6408 if (matis->A == pcbddc->local_mat) { 6409 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6410 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6411 } else { 6412 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6413 } 6414 } 6415 if (!pcbddc->benign_change_explicit) { 6416 benign_n = pcbddc->benign_n; 6417 } else { 6418 benign_n = 0; 6419 } 6420 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6421 We need a global reduction to avoid possible deadlocks. 6422 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6423 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6424 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6425 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6426 need_change = (PetscBool)(!need_change); 6427 } 6428 /* If the user defines additional constraints, we import them here. 6429 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6430 if (need_change) { 6431 PC_IS *pcisf; 6432 PC_BDDC *pcbddcf; 6433 PC pcf; 6434 6435 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6436 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6437 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6438 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6439 /* hacks */ 6440 pcisf = (PC_IS*)pcf->data; 6441 pcisf->is_B_local = pcis->is_B_local; 6442 pcisf->vec1_N = pcis->vec1_N; 6443 pcisf->BtoNmap = pcis->BtoNmap; 6444 pcisf->n = pcis->n; 6445 pcisf->n_B = pcis->n_B; 6446 pcbddcf = (PC_BDDC*)pcf->data; 6447 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6448 pcbddcf->mat_graph = pcbddc->mat_graph; 6449 pcbddcf->use_faces = PETSC_TRUE; 6450 pcbddcf->use_change_of_basis = PETSC_TRUE; 6451 pcbddcf->use_change_on_faces = PETSC_TRUE; 6452 pcbddcf->use_qr_single = PETSC_TRUE; 6453 pcbddcf->fake_change = PETSC_TRUE; 6454 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6455 /* store information on primal vertices and change of basis (in local numbering) */ 6456 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6457 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6458 change = pcbddcf->ConstraintMatrix; 6459 pcbddcf->ConstraintMatrix = NULL; 6460 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6461 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6462 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6463 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6464 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6465 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6466 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6467 pcf->ops->destroy = NULL; 6468 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6469 } 6470 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6471 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6472 ierr = MatDestroy(&change);CHKERRQ(ierr); 6473 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6474 } 6475 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6476 6477 /* free adjacency */ 6478 if (free_used_adj) { 6479 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6480 } 6481 PetscFunctionReturn(0); 6482 } 6483 6484 #undef __FUNCT__ 6485 #define __FUNCT__ "PCBDDCInitSubSchurs" 6486 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6487 { 6488 PC_IS *pcis=(PC_IS*)pc->data; 6489 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6490 PCBDDCGraph graph; 6491 PetscErrorCode ierr; 6492 6493 PetscFunctionBegin; 6494 /* attach interface graph for determining subsets */ 6495 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6496 IS verticesIS,verticescomm; 6497 PetscInt vsize,*idxs; 6498 6499 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6500 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6501 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6502 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6503 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6504 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6505 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6506 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6507 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6508 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6509 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6510 } else { 6511 graph = pcbddc->mat_graph; 6512 } 6513 /* print some info */ 6514 if (pcbddc->dbg_flag) { 6515 IS vertices; 6516 PetscInt nv,nedges,nfaces; 6517 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6518 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6519 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6520 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6521 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6523 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6525 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6527 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6528 } 6529 6530 /* sub_schurs init */ 6531 if (!pcbddc->sub_schurs) { 6532 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6533 } 6534 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6535 6536 /* free graph struct */ 6537 if (pcbddc->sub_schurs_rebuild) { 6538 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6539 } 6540 PetscFunctionReturn(0); 6541 } 6542 6543 #undef __FUNCT__ 6544 #define __FUNCT__ "PCBDDCCheckOperator" 6545 PetscErrorCode PCBDDCCheckOperator(PC pc) 6546 { 6547 PC_IS *pcis=(PC_IS*)pc->data; 6548 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6549 PetscErrorCode ierr; 6550 6551 PetscFunctionBegin; 6552 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6553 IS zerodiag = NULL; 6554 Mat S_j,B0_B=NULL; 6555 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6556 PetscScalar *p0_check,*array,*array2; 6557 PetscReal norm; 6558 PetscInt i; 6559 6560 /* B0 and B0_B */ 6561 if (zerodiag) { 6562 IS dummy; 6563 6564 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6565 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6566 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6567 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6568 } 6569 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6570 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6571 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6572 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6573 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6574 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6575 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6576 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6577 /* S_j */ 6578 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6579 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6580 6581 /* mimic vector in \widetilde{W}_\Gamma */ 6582 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6583 /* continuous in primal space */ 6584 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6585 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6586 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6587 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6588 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6589 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6590 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6591 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6592 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6593 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6594 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6595 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6596 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6597 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6598 6599 /* assemble rhs for coarse problem */ 6600 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6601 /* local with Schur */ 6602 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6603 if (zerodiag) { 6604 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6605 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6606 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6607 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6608 } 6609 /* sum on primal nodes the local contributions */ 6610 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6611 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6612 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6613 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6614 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6615 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6616 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6617 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6618 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6619 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6620 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6621 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6622 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6623 /* scale primal nodes (BDDC sums contibutions) */ 6624 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6625 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6626 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6627 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6628 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6629 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6630 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6631 /* global: \widetilde{B0}_B w_\Gamma */ 6632 if (zerodiag) { 6633 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6634 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6635 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6636 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6637 } 6638 /* BDDC */ 6639 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6640 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6641 6642 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6643 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6644 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6645 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6646 for (i=0;i<pcbddc->benign_n;i++) { 6647 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6648 } 6649 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6650 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6651 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6652 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6653 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6654 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6655 } 6656 PetscFunctionReturn(0); 6657 } 6658