1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 9 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 10 { 11 PetscErrorCode ierr; 12 Vec local,global; 13 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 14 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 15 16 PetscFunctionBegin; 17 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 18 /* need to convert from global to local topology information and remove references to information in global ordering */ 19 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 20 if (pcbddc->user_provided_isfordofs) { 21 if (pcbddc->n_ISForDofs) { 22 PetscInt i; 23 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 24 for (i=0;i<pcbddc->n_ISForDofs;i++) { 25 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 26 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 27 } 28 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 29 pcbddc->n_ISForDofs = 0; 30 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 31 } 32 } else { 33 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 34 PetscInt i, n = matis->A->rmap->n; 35 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 36 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 37 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 38 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 39 } 40 } 41 } 42 43 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 44 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 45 } 46 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 47 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 48 } 49 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 50 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 51 } 52 ierr = VecDestroy(&global);CHKERRQ(ierr); 53 ierr = VecDestroy(&local);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 #undef __FUNCT__ 58 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 59 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 60 { 61 PC_IS *pcis = (PC_IS*)(pc->data); 62 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 if (!pcbddc->benign_have_null) { 67 PetscFunctionReturn(0); 68 } 69 if (pcbddc->ChangeOfBasisMatrix) { 70 Vec swap; 71 72 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 73 swap = pcbddc->work_change; 74 pcbddc->work_change = r; 75 r = swap; 76 } 77 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 78 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79 if (pcbddc->benign_n) { 80 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 81 } else { 82 ierr = VecSet(pcis->vec2_D,0.);CHKERRQ(ierr); 83 } 84 ierr = VecSet(z,0.);CHKERRQ(ierr); 85 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 86 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 87 if (pcbddc->ChangeOfBasisMatrix) { 88 Vec swap; 89 90 swap = r; 91 r = pcbddc->work_change; 92 pcbddc->work_change = swap; 93 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 94 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 101 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 102 { 103 PCBDDCBenignMatMult_ctx ctx; 104 PetscErrorCode ierr; 105 PetscBool apply_right,apply_left,reset_x; 106 107 PetscFunctionBegin; 108 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 109 if (transpose) { 110 apply_right = ctx->apply_left; 111 apply_left = ctx->apply_right; 112 } else { 113 apply_right = ctx->apply_right; 114 apply_left = ctx->apply_left; 115 } 116 reset_x = PETSC_FALSE; 117 if (apply_right) { 118 const PetscScalar *ax; 119 PetscInt nl,i; 120 121 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 122 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 123 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 125 for (i=0;i<ctx->benign_n;i++) { 126 PetscScalar sum,val; 127 const PetscInt *idxs; 128 PetscInt nz,j; 129 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 130 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 131 sum = 0.; 132 if (ctx->apply_p0) { 133 val = ctx->work[idxs[nz-1]]; 134 for (j=0;j<nz-1;j++) { 135 sum += ctx->work[idxs[j]]; 136 ctx->work[idxs[j]] += val; 137 } 138 } else { 139 for (j=0;j<nz-1;j++) { 140 sum += ctx->work[idxs[j]]; 141 } 142 } 143 ctx->work[idxs[nz-1]] -= sum; 144 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 145 } 146 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 147 reset_x = PETSC_TRUE; 148 } 149 if (transpose) { 150 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 151 } else { 152 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 153 } 154 if (reset_x) { 155 ierr = VecResetArray(x);CHKERRQ(ierr); 156 } 157 if (apply_left) { 158 PetscScalar *ay; 159 PetscInt i; 160 161 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 162 for (i=0;i<ctx->benign_n;i++) { 163 PetscScalar sum,val; 164 const PetscInt *idxs; 165 PetscInt nz,j; 166 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 167 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 168 val = -ay[idxs[nz-1]]; 169 if (ctx->apply_p0) { 170 sum = 0.; 171 for (j=0;j<nz-1;j++) { 172 sum += ay[idxs[j]]; 173 ay[idxs[j]] += val; 174 } 175 ay[idxs[nz-1]] += sum; 176 } else { 177 for (j=0;j<nz-1;j++) { 178 ay[idxs[j]] += val; 179 } 180 ay[idxs[nz-1]] = 0.; 181 } 182 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 183 } 184 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 191 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 192 { 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 202 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 203 { 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "PCBDDCBenignShellMat" 213 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 214 { 215 PC_IS *pcis = (PC_IS*)pc->data; 216 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 217 PCBDDCBenignMatMult_ctx ctx; 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 if (!restore) { 222 Mat A_IB,A_BI; 223 PetscScalar *work; 224 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 225 226 if (pcbddc->benign_original_mat) { 227 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 228 } 229 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 230 PetscFunctionReturn(0); 231 } 232 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 233 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 234 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 235 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 236 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 237 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 238 ierr = PetscNew(&ctx);CHKERRQ(ierr); 239 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 240 ctx->apply_left = PETSC_TRUE; 241 ctx->apply_right = PETSC_FALSE; 242 ctx->apply_p0 = PETSC_FALSE; 243 ctx->benign_n = pcbddc->benign_n; 244 if (reuse) { 245 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 246 ctx->free = PETSC_FALSE; 247 } else { /* TODO: could be optimized for successive solves */ 248 ISLocalToGlobalMapping N_to_D; 249 PetscInt i; 250 251 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 252 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 253 for (i=0;i<pcbddc->benign_n;i++) { 254 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 255 } 256 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 257 ctx->free = PETSC_TRUE; 258 } 259 ctx->A = pcis->A_IB; 260 ctx->work = work; 261 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 262 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 263 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 264 pcis->A_IB = A_IB; 265 266 /* A_BI as A_IB^T */ 267 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 268 pcbddc->benign_original_mat = pcis->A_BI; 269 pcis->A_BI = A_BI; 270 } else { 271 if (!pcbddc->benign_original_mat) { 272 PetscFunctionReturn(0); 273 } 274 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 275 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 276 pcis->A_IB = ctx->A; 277 ctx->A = NULL; 278 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 279 pcis->A_BI = pcbddc->benign_original_mat; 280 pcbddc->benign_original_mat = NULL; 281 if (ctx->free) { 282 PetscInt i; 283 for (i=0;i<ctx->benign_n;i++) { 284 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 285 } 286 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 287 } 288 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 289 ierr = PetscFree(ctx);CHKERRQ(ierr); 290 } 291 PetscFunctionReturn(0); 292 } 293 294 /* used just in bddc debug mode */ 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCBenignProject" 297 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 298 { 299 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 300 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 301 Mat An; 302 PetscErrorCode ierr; 303 304 PetscFunctionBegin; 305 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 306 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 307 if (is1) { 308 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 309 ierr = MatDestroy(&An);CHKERRQ(ierr); 310 } else { 311 *B = An; 312 } 313 PetscFunctionReturn(0); 314 } 315 316 /* TODO: add reuse flag */ 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatSeqAIJCompress" 319 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 320 { 321 Mat Bt; 322 PetscScalar *a,*bdata; 323 const PetscInt *ii,*ij; 324 PetscInt m,n,i,nnz,*bii,*bij; 325 PetscBool flg_row; 326 PetscErrorCode ierr; 327 328 PetscFunctionBegin; 329 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 330 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 331 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 332 nnz = n; 333 for (i=0;i<ii[n];i++) { 334 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 335 } 336 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 337 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 338 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 339 nnz = 0; 340 bii[0] = 0; 341 for (i=0;i<n;i++) { 342 PetscInt j; 343 for (j=ii[i];j<ii[i+1];j++) { 344 PetscScalar entry = a[j]; 345 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 346 bij[nnz] = ij[j]; 347 bdata[nnz] = entry; 348 nnz++; 349 } 350 } 351 bii[i+1] = nnz; 352 } 353 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 354 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 355 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 356 { 357 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 358 b->free_a = PETSC_TRUE; 359 b->free_ij = PETSC_TRUE; 360 } 361 *B = Bt; 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatDetectDisconnectedComponents" 367 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 368 { 369 Mat B; 370 IS is_dummy,*cc_n; 371 ISLocalToGlobalMapping l2gmap_dummy; 372 PCBDDCGraph graph; 373 PetscInt i,n; 374 PetscInt *xadj,*adjncy; 375 PetscInt *xadj_filtered,*adjncy_filtered; 376 PetscBool flg_row,isseqaij; 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 if (!A->rmap->N || !A->cmap->N) { 381 *ncc = 0; 382 *cc = NULL; 383 PetscFunctionReturn(0); 384 } 385 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 386 if (!isseqaij && filter) { 387 PetscBool isseqdense; 388 389 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 390 if (!isseqdense) { 391 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 392 } else { /* TODO: rectangular case and LDA */ 393 PetscScalar *array; 394 PetscReal chop=1.e-6; 395 396 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 397 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 398 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 399 for (i=0;i<n;i++) { 400 PetscInt j; 401 for (j=i+1;j<n;j++) { 402 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 403 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 404 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 405 } 406 } 407 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 408 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 409 } 410 } else { 411 B = A; 412 } 413 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 414 415 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 416 if (filter) { 417 PetscScalar *data; 418 PetscInt j,cum; 419 420 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 421 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 422 cum = 0; 423 for (i=0;i<n;i++) { 424 PetscInt t; 425 426 for (j=xadj[i];j<xadj[i+1];j++) { 427 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 428 continue; 429 } 430 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 431 } 432 t = xadj_filtered[i]; 433 xadj_filtered[i] = cum; 434 cum += t; 435 } 436 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 437 } else { 438 xadj_filtered = NULL; 439 adjncy_filtered = NULL; 440 } 441 442 /* compute local connected components using PCBDDCGraph */ 443 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 444 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 445 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 446 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 447 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 448 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 449 if (xadj_filtered) { 450 graph->xadj = xadj_filtered; 451 graph->adjncy = adjncy_filtered; 452 } else { 453 graph->xadj = xadj; 454 graph->adjncy = adjncy; 455 } 456 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 457 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 458 /* partial clean up */ 459 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 460 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 461 if (A != B) { 462 ierr = MatDestroy(&B);CHKERRQ(ierr); 463 } 464 465 /* get back data */ 466 if (ncc) *ncc = graph->ncc; 467 if (cc) { 468 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 469 for (i=0;i<graph->ncc;i++) { 470 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 471 } 472 *cc = cc_n; 473 } 474 /* clean up graph */ 475 graph->xadj = 0; 476 graph->adjncy = 0; 477 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCBDDCBenignCheck" 483 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 484 { 485 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 486 PC_IS* pcis = (PC_IS*)(pc->data); 487 IS dirIS = NULL; 488 PetscInt i; 489 PetscErrorCode ierr; 490 491 PetscFunctionBegin; 492 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 493 if (zerodiag) { 494 Mat A; 495 Vec vec3_N; 496 PetscScalar *vals; 497 const PetscInt *idxs; 498 PetscInt nz,*count; 499 500 /* p0 */ 501 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 502 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 503 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 504 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 505 for (i=0;i<nz;i++) vals[i] = 1.; 506 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 507 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 508 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 509 /* v_I */ 510 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 511 for (i=0;i<nz;i++) vals[i] = 0.; 512 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 514 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 515 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 516 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 518 if (dirIS) { 519 PetscInt n; 520 521 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 522 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 523 for (i=0;i<n;i++) vals[i] = 0.; 524 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 525 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 526 } 527 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 528 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 529 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 530 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 531 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 532 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 533 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 534 if (PetscAbsScalar(vals[0]) > 1.e-1) { 535 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 536 } 537 ierr = PetscFree(vals);CHKERRQ(ierr); 538 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 539 540 /* there should not be any pressure dofs lying on the interface */ 541 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 542 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 543 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 544 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 545 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 546 for (i=0;i<nz;i++) { 547 if (count[idxs[i]]) { 548 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 549 } 550 } 551 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 552 ierr = PetscFree(count);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 555 556 /* check PCBDDCBenignGetOrSetP0 */ 557 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 558 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 559 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 560 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 561 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 562 for (i=0;i<pcbddc->benign_n;i++) { 563 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 564 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 572 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 573 { 574 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 575 IS pressures,zerodiag,*zerodiag_subs; 576 PetscInt nz,n; 577 PetscInt *interior_dofs,n_interior_dofs; 578 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 579 PetscErrorCode ierr; 580 581 PetscFunctionBegin; 582 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 583 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 584 for (n=0;n<pcbddc->benign_n;n++) { 585 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 586 } 587 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 588 pcbddc->benign_n = 0; 589 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 590 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 591 Checks if all the pressure dofs in each subdomain have a zero diagonal 592 If not, a change of basis on pressures is not needed 593 since the local Schur complements are SPD 594 */ 595 has_null_pressures = PETSC_TRUE; 596 have_null = PETSC_TRUE; 597 if (pcbddc->n_ISForDofsLocal) { 598 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 599 600 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 601 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 602 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 603 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 604 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 605 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 606 if (!sorted) { 607 ierr = ISSort(pressures);CHKERRQ(ierr); 608 } 609 } else { 610 pressures = NULL; 611 } 612 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 613 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 614 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 615 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 616 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 617 if (!sorted) { 618 ierr = ISSort(zerodiag);CHKERRQ(ierr); 619 } 620 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 621 if (!nz) { 622 if (n) have_null = PETSC_FALSE; 623 has_null_pressures = PETSC_FALSE; 624 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 625 } 626 recompute_zerodiag = PETSC_FALSE; 627 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 628 zerodiag_subs = NULL; 629 pcbddc->benign_n = 0; 630 n_interior_dofs = 0; 631 interior_dofs = NULL; 632 if (pcbddc->current_level) { /* need to compute interior nodes */ 633 PetscInt n,i,j; 634 PetscInt n_neigh,*neigh,*n_shared,**shared; 635 PetscInt *iwork; 636 637 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 638 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 639 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 640 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 641 for (i=0;i<n_neigh;i++) 642 for (j=0;j<n_shared[i];j++) 643 iwork[shared[i][j]] += 1; 644 for (i=0;i<n;i++) 645 if (!iwork[i]) 646 interior_dofs[n_interior_dofs++] = i; 647 ierr = PetscFree(iwork);CHKERRQ(ierr); 648 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 649 } 650 if (has_null_pressures) { 651 IS *subs; 652 PetscInt nsubs,i,j,nl; 653 const PetscInt *idxs; 654 PetscScalar *array; 655 Vec *work; 656 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 657 658 subs = pcbddc->local_subs; 659 nsubs = pcbddc->n_local_subs; 660 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 661 if (pcbddc->current_level) { 662 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 663 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 664 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 665 /* work[0] = 1_p */ 666 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 667 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 668 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 669 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 670 /* work[0] = 1_v */ 671 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 672 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 673 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 674 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 675 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 676 } 677 if (nsubs > 1) { 678 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 679 for (i=0;i<nsubs;i++) { 680 ISLocalToGlobalMapping l2g; 681 IS t_zerodiag_subs; 682 PetscInt nl; 683 684 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 685 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 686 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 687 if (nl) { 688 PetscBool valid = PETSC_TRUE; 689 690 if (pcbddc->current_level) { 691 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 692 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 693 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 694 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 695 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 696 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 697 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 698 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 699 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 700 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 701 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 702 for (j=0;j<n_interior_dofs;j++) { 703 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 704 valid = PETSC_FALSE; 705 break; 706 } 707 } 708 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 709 } 710 if (valid && pcbddc->NeumannBoundariesLocal) { 711 IS t_bc; 712 PetscInt nzb; 713 714 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 715 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 716 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 717 if (nzb) valid = PETSC_FALSE; 718 } 719 if (valid && pressures) { 720 IS t_pressure_subs; 721 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 722 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 723 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 724 } 725 if (valid) { 726 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 727 pcbddc->benign_n++; 728 } else { 729 recompute_zerodiag = PETSC_TRUE; 730 } 731 } 732 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 733 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 734 } 735 } else { /* there's just one subdomain (or zero if they have not been detected */ 736 PetscBool valid = PETSC_TRUE; 737 738 if (pcbddc->NeumannBoundariesLocal) { 739 PetscInt nzb; 740 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 741 if (nzb) valid = PETSC_FALSE; 742 } 743 if (valid && pressures) { 744 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 745 } 746 if (valid && pcbddc->current_level) { 747 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 748 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 749 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 750 for (j=0;j<n_interior_dofs;j++) { 751 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 752 valid = PETSC_FALSE; 753 break; 754 } 755 } 756 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 757 } 758 if (valid) { 759 pcbddc->benign_n = 1; 760 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 761 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 762 zerodiag_subs[0] = zerodiag; 763 } 764 } 765 if (pcbddc->current_level) { 766 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 767 } 768 } 769 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 770 771 if (!pcbddc->benign_n) { 772 PetscInt n; 773 774 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 775 recompute_zerodiag = PETSC_FALSE; 776 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 777 if (n) { 778 has_null_pressures = PETSC_FALSE; 779 have_null = PETSC_FALSE; 780 } 781 } 782 783 /* final check for null pressures */ 784 if (zerodiag && pressures) { 785 PetscInt nz,np; 786 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 787 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 788 if (nz != np) have_null = PETSC_FALSE; 789 } 790 791 if (recompute_zerodiag) { 792 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 793 if (pcbddc->benign_n == 1) { 794 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 795 zerodiag = zerodiag_subs[0]; 796 } else { 797 PetscInt i,nzn,*new_idxs; 798 799 nzn = 0; 800 for (i=0;i<pcbddc->benign_n;i++) { 801 PetscInt ns; 802 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 803 nzn += ns; 804 } 805 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 806 nzn = 0; 807 for (i=0;i<pcbddc->benign_n;i++) { 808 PetscInt ns,*idxs; 809 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 810 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 811 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 812 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 813 nzn += ns; 814 } 815 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 816 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 817 } 818 have_null = PETSC_FALSE; 819 } 820 821 /* no-net-flux */ 822 if (pcbddc->benign_compute_nonetflux) { 823 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 824 MatNullSpace near_null_space; 825 ISLocalToGlobalMapping rmap; 826 Vec quad_vec; 827 PetscScalar *pvals; 828 PetscInt i,np,*dummyins; 829 IS isused = NULL; 830 PetscBool participate = PETSC_TRUE; 831 832 /* create vector to hold quadrature weights */ 833 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 834 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 835 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 836 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 837 838 /* compute B^{(i)T} * 1_p */ 839 np = 0; 840 if (pressures) { 841 isused = pressures; 842 } else { 843 isused = zerodiag; 844 } 845 if (isused) { 846 ierr = ISGetLocalSize(isused,&np);CHKERRQ(ierr); 847 } 848 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 849 for (i=0;i<np;i++) pvals[i] = 1.; 850 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 851 if (np) { 852 const PetscInt *pidxs; 853 854 if (isused) { 855 ierr = ISGetIndices(isused,&pidxs);CHKERRQ(ierr); 856 } 857 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 858 if (isused) { 859 ierr = ISRestoreIndices(isused,&pidxs);CHKERRQ(ierr); 860 } 861 } 862 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 863 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 864 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 865 ierr = PetscFree(pvals);CHKERRQ(ierr); 866 if (!isused) participate = PETSC_FALSE; 867 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 868 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 869 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 870 for (i=0;i<matis->sf->nleaves;i++) 871 if (participate) matis->sf_leafdata[i] = PetscGlobalRank; 872 else matis->sf_leafdata[i] = -1; 873 874 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 875 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 876 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 877 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 878 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 879 for (i=0;i<matis->sf->nleaves;i++) { 880 if (PetscGlobalRank != matis->sf_leafdata[i]) { 881 pvals[i] = 0.; 882 } 883 } 884 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 885 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 886 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 887 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 888 ierr = PetscFree(dummyins);CHKERRQ(ierr); 889 890 /* assembly quadrature vec and attach near null space to pmat */ 891 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 892 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 893 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 894 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 895 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 896 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 897 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 898 } 899 900 /* change of basis and p0 dofs */ 901 if (has_null_pressures) { 902 IS zerodiagc; 903 const PetscInt *idxs,*idxsc; 904 PetscInt i,s,*nnz; 905 906 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 907 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 908 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 909 /* local change of basis for pressures */ 910 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 911 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 912 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 913 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 914 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 915 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 916 for (i=0;i<pcbddc->benign_n;i++) { 917 PetscInt nzs,j; 918 919 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 920 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 921 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 922 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 923 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 924 } 925 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 926 ierr = PetscFree(nnz);CHKERRQ(ierr); 927 /* set identity on velocities */ 928 for (i=0;i<n-nz;i++) { 929 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 930 } 931 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 932 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 933 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 934 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 935 /* set change on pressures */ 936 for (s=0;s<pcbddc->benign_n;s++) { 937 PetscScalar *array; 938 PetscInt nzs; 939 940 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 941 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 942 for (i=0;i<nzs-1;i++) { 943 PetscScalar vals[2]; 944 PetscInt cols[2]; 945 946 cols[0] = idxs[i]; 947 cols[1] = idxs[nzs-1]; 948 vals[0] = 1.; 949 vals[1] = 1.; 950 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 951 } 952 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 953 for (i=0;i<nzs-1;i++) array[i] = -1.; 954 array[nzs-1] = 1.; 955 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 956 /* store local idxs for p0 */ 957 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 958 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 959 ierr = PetscFree(array);CHKERRQ(ierr); 960 } 961 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 963 /* project if needed */ 964 if (pcbddc->benign_change_explicit) { 965 Mat M; 966 967 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 968 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 969 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 970 ierr = MatDestroy(&M);CHKERRQ(ierr); 971 } 972 /* store global idxs for p0 */ 973 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 974 } 975 pcbddc->benign_zerodiag_subs = zerodiag_subs; 976 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 977 978 /* determines if the coarse solver will be singular or not */ 979 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 980 /* determines if the problem has subdomains with 0 pressure block */ 981 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 982 *zerodiaglocal = zerodiag; 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 988 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 989 { 990 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 991 PetscScalar *array; 992 PetscErrorCode ierr; 993 994 PetscFunctionBegin; 995 if (!pcbddc->benign_sf) { 996 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 997 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 998 } 999 if (get) { 1000 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1001 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1002 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1003 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1004 } else { 1005 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1006 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1007 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1008 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1015 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1016 { 1017 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 /* TODO: add error checking 1022 - avoid nested pop (or push) calls. 1023 - cannot push before pop. 1024 - cannot call this if pcbddc->local_mat is NULL 1025 */ 1026 if (!pcbddc->benign_n) { 1027 PetscFunctionReturn(0); 1028 } 1029 if (pop) { 1030 if (pcbddc->benign_change_explicit) { 1031 IS is_p0; 1032 MatReuse reuse; 1033 1034 /* extract B_0 */ 1035 reuse = MAT_INITIAL_MATRIX; 1036 if (pcbddc->benign_B0) { 1037 reuse = MAT_REUSE_MATRIX; 1038 } 1039 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1040 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1041 /* remove rows and cols from local problem */ 1042 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1043 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1044 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1045 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1046 } else { 1047 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1048 PetscScalar *vals; 1049 PetscInt i,n,*idxs_ins; 1050 1051 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1052 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1053 if (!pcbddc->benign_B0) { 1054 PetscInt *nnz; 1055 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1056 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1057 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1058 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1059 for (i=0;i<pcbddc->benign_n;i++) { 1060 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1061 nnz[i] = n - nnz[i]; 1062 } 1063 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1064 ierr = PetscFree(nnz);CHKERRQ(ierr); 1065 } 1066 1067 for (i=0;i<pcbddc->benign_n;i++) { 1068 PetscScalar *array; 1069 PetscInt *idxs,j,nz,cum; 1070 1071 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1072 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1073 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1074 for (j=0;j<nz;j++) vals[j] = 1.; 1075 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1076 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1077 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1078 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1079 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1080 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1081 cum = 0; 1082 for (j=0;j<n;j++) { 1083 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1084 vals[cum] = array[j]; 1085 idxs_ins[cum] = j; 1086 cum++; 1087 } 1088 } 1089 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1090 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1091 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1092 } 1093 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1094 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1095 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1096 } 1097 } else { /* push */ 1098 if (pcbddc->benign_change_explicit) { 1099 PetscInt i; 1100 1101 for (i=0;i<pcbddc->benign_n;i++) { 1102 PetscScalar *B0_vals; 1103 PetscInt *B0_cols,B0_ncol; 1104 1105 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1106 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1107 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1108 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1109 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1110 } 1111 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1112 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1113 } else { 1114 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1115 } 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1122 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1123 { 1124 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1125 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1126 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1127 PetscBLASInt *B_iwork,*B_ifail; 1128 PetscScalar *work,lwork; 1129 PetscScalar *St,*S,*eigv; 1130 PetscScalar *Sarray,*Starray; 1131 PetscReal *eigs,thresh; 1132 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1133 PetscBool allocated_S_St; 1134 #if defined(PETSC_USE_COMPLEX) 1135 PetscReal *rwork; 1136 #endif 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1141 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1142 1143 if (pcbddc->dbg_flag) { 1144 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1147 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1148 } 1149 1150 if (pcbddc->dbg_flag) { 1151 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1152 } 1153 1154 /* max size of subsets */ 1155 mss = 0; 1156 for (i=0;i<sub_schurs->n_subs;i++) { 1157 PetscInt subset_size; 1158 1159 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1160 mss = PetscMax(mss,subset_size); 1161 } 1162 1163 /* min/max and threshold */ 1164 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1165 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1166 nmax = PetscMax(nmin,nmax); 1167 allocated_S_St = PETSC_FALSE; 1168 if (nmin) { 1169 allocated_S_St = PETSC_TRUE; 1170 } 1171 1172 /* allocate lapack workspace */ 1173 cum = cum2 = 0; 1174 maxneigs = 0; 1175 for (i=0;i<sub_schurs->n_subs;i++) { 1176 PetscInt n,subset_size; 1177 1178 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1179 n = PetscMin(subset_size,nmax); 1180 cum += subset_size; 1181 cum2 += subset_size*n; 1182 maxneigs = PetscMax(maxneigs,n); 1183 } 1184 if (mss) { 1185 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1186 PetscBLASInt B_itype = 1; 1187 PetscBLASInt B_N = mss; 1188 PetscReal zero = 0.0; 1189 PetscReal eps = 0.0; /* dlamch? */ 1190 1191 B_lwork = -1; 1192 S = NULL; 1193 St = NULL; 1194 eigs = NULL; 1195 eigv = NULL; 1196 B_iwork = NULL; 1197 B_ifail = NULL; 1198 #if defined(PETSC_USE_COMPLEX) 1199 rwork = NULL; 1200 #endif 1201 thresh = 1.0; 1202 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1203 #if defined(PETSC_USE_COMPLEX) 1204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1205 #else 1206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1207 #endif 1208 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1210 } else { 1211 /* TODO */ 1212 } 1213 } else { 1214 lwork = 0; 1215 } 1216 1217 nv = 0; 1218 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1219 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1220 } 1221 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1222 if (allocated_S_St) { 1223 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1224 } 1225 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1226 #if defined(PETSC_USE_COMPLEX) 1227 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1230 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1231 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1232 nv+cum,&pcbddc->adaptive_constraints_idxs, 1233 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1234 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1235 1236 maxneigs = 0; 1237 cum = cumarray = 0; 1238 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1239 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1240 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1241 const PetscInt *idxs; 1242 1243 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1244 for (cum=0;cum<nv;cum++) { 1245 pcbddc->adaptive_constraints_n[cum] = 1; 1246 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1247 pcbddc->adaptive_constraints_data[cum] = 1.0; 1248 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1249 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1250 } 1251 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1252 } 1253 1254 if (mss) { /* multilevel */ 1255 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1256 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1257 } 1258 1259 thresh = pcbddc->adaptive_threshold; 1260 for (i=0;i<sub_schurs->n_subs;i++) { 1261 const PetscInt *idxs; 1262 PetscReal upper,lower; 1263 PetscInt j,subset_size,eigs_start = 0; 1264 PetscBLASInt B_N; 1265 PetscBool same_data = PETSC_FALSE; 1266 1267 if (pcbddc->use_deluxe_scaling) { 1268 upper = PETSC_MAX_REAL; 1269 lower = thresh; 1270 } else { 1271 upper = 1./thresh; 1272 lower = 0.; 1273 } 1274 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1275 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1276 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1277 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1278 if (sub_schurs->is_hermitian) { 1279 PetscInt j,k; 1280 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1281 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1282 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1283 } 1284 for (j=0;j<subset_size;j++) { 1285 for (k=j;k<subset_size;k++) { 1286 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1287 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1288 } 1289 } 1290 } else { 1291 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1292 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1293 } 1294 } else { 1295 S = Sarray + cumarray; 1296 St = Starray + cumarray; 1297 } 1298 /* see if we can save some work */ 1299 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1300 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1301 } 1302 1303 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1304 B_neigs = 0; 1305 } else { 1306 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1307 PetscBLASInt B_itype = 1; 1308 PetscBLASInt B_IL, B_IU; 1309 PetscReal eps = -1.0; /* dlamch? */ 1310 PetscInt nmin_s; 1311 PetscBool compute_range = PETSC_FALSE; 1312 1313 if (pcbddc->dbg_flag) { 1314 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1315 } 1316 1317 compute_range = PETSC_FALSE; 1318 if (thresh > 1.+PETSC_SMALL && !same_data) { 1319 compute_range = PETSC_TRUE; 1320 } 1321 1322 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1323 if (compute_range) { 1324 1325 /* ask for eigenvalues larger than thresh */ 1326 #if defined(PETSC_USE_COMPLEX) 1327 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1328 #else 1329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1330 #endif 1331 } else if (!same_data) { 1332 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1333 B_IL = 1; 1334 #if defined(PETSC_USE_COMPLEX) 1335 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1336 #else 1337 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1338 #endif 1339 } else { /* same_data is true, so get the adaptive function requested by the user */ 1340 PetscInt k; 1341 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1342 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1343 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1344 nmin = nmax; 1345 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1346 for (k=0;k<nmax;k++) { 1347 eigs[k] = 1./PETSC_SMALL; 1348 eigv[k*(subset_size+1)] = 1.0; 1349 } 1350 } 1351 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1352 if (B_ierr) { 1353 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1354 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1355 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1356 } 1357 1358 if (B_neigs > nmax) { 1359 if (pcbddc->dbg_flag) { 1360 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1361 } 1362 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1363 B_neigs = nmax; 1364 } 1365 1366 nmin_s = PetscMin(nmin,B_N); 1367 if (B_neigs < nmin_s) { 1368 PetscBLASInt B_neigs2; 1369 1370 if (pcbddc->use_deluxe_scaling) { 1371 B_IL = B_N - nmin_s + 1; 1372 B_IU = B_N - B_neigs; 1373 } else { 1374 B_IL = B_neigs + 1; 1375 B_IU = nmin_s; 1376 } 1377 if (pcbddc->dbg_flag) { 1378 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1379 } 1380 if (sub_schurs->is_hermitian) { 1381 PetscInt j; 1382 for (j=0;j<subset_size;j++) { 1383 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1384 } 1385 for (j=0;j<subset_size;j++) { 1386 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1387 } 1388 } else { 1389 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1390 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1391 } 1392 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1393 #if defined(PETSC_USE_COMPLEX) 1394 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1395 #else 1396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1397 #endif 1398 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1399 B_neigs += B_neigs2; 1400 } 1401 if (B_ierr) { 1402 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1403 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1404 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1405 } 1406 if (pcbddc->dbg_flag) { 1407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1408 for (j=0;j<B_neigs;j++) { 1409 if (eigs[j] == 0.0) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1411 } else { 1412 if (pcbddc->use_deluxe_scaling) { 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1416 } 1417 } 1418 } 1419 } 1420 } else { 1421 /* TODO */ 1422 } 1423 } 1424 /* change the basis back to the original one */ 1425 if (sub_schurs->change) { 1426 Mat change,phi,phit; 1427 1428 if (pcbddc->dbg_flag > 1) { 1429 PetscInt ii; 1430 for (ii=0;ii<B_neigs;ii++) { 1431 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1432 for (j=0;j<B_N;j++) { 1433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1434 } 1435 } 1436 } 1437 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1438 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1439 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1440 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1441 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1442 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1443 } 1444 maxneigs = PetscMax(B_neigs,maxneigs); 1445 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1446 if (B_neigs) { 1447 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1448 1449 if (pcbddc->dbg_flag > 1) { 1450 PetscInt ii; 1451 for (ii=0;ii<B_neigs;ii++) { 1452 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1453 for (j=0;j<B_N;j++) { 1454 #if defined(PETSC_USE_COMPLEX) 1455 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1456 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1457 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1458 #else 1459 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1460 #endif 1461 } 1462 } 1463 } 1464 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1465 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1466 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1467 cum++; 1468 } 1469 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1470 /* shift for next computation */ 1471 cumarray += subset_size*subset_size; 1472 } 1473 if (pcbddc->dbg_flag) { 1474 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1475 } 1476 1477 if (mss) { 1478 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1479 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1480 /* destroy matrices (junk) */ 1481 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1482 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1483 } 1484 if (allocated_S_St) { 1485 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1486 } 1487 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1488 #if defined(PETSC_USE_COMPLEX) 1489 ierr = PetscFree(rwork);CHKERRQ(ierr); 1490 #endif 1491 if (pcbddc->dbg_flag) { 1492 PetscInt maxneigs_r; 1493 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1494 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "PCBDDCSetUpSolvers" 1501 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1502 { 1503 PetscScalar *coarse_submat_vals; 1504 PetscErrorCode ierr; 1505 1506 PetscFunctionBegin; 1507 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1508 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1509 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1510 1511 /* Setup local neumann solver ksp_R */ 1512 /* PCBDDCSetUpLocalScatters should be called first! */ 1513 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1514 1515 /* 1516 Setup local correction and local part of coarse basis. 1517 Gives back the dense local part of the coarse matrix in column major ordering 1518 */ 1519 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1520 1521 /* Compute total number of coarse nodes and setup coarse solver */ 1522 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1523 1524 /* free */ 1525 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "PCBDDCResetCustomization" 1531 PetscErrorCode PCBDDCResetCustomization(PC pc) 1532 { 1533 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1538 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1539 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1540 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1541 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1542 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1543 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1544 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1545 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1546 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1547 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "PCBDDCResetTopography" 1553 PetscErrorCode PCBDDCResetTopography(PC pc) 1554 { 1555 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1556 PetscInt i; 1557 PetscErrorCode ierr; 1558 1559 PetscFunctionBegin; 1560 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1561 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1562 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1563 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1564 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1565 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1566 for (i=0;i<pcbddc->n_local_subs;i++) { 1567 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1568 } 1569 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1570 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "PCBDDCResetSolvers" 1576 PetscErrorCode PCBDDCResetSolvers(PC pc) 1577 { 1578 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1583 if (pcbddc->coarse_phi_B) { 1584 PetscScalar *array; 1585 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1586 ierr = PetscFree(array);CHKERRQ(ierr); 1587 } 1588 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1589 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1590 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1591 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1592 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1593 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1594 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1595 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1596 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1597 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1598 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1599 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1600 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1601 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1602 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1603 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1604 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1605 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1606 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1607 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1608 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1609 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1610 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1611 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1612 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1613 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1614 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1615 if (pcbddc->benign_zerodiag_subs) { 1616 PetscInt i; 1617 for (i=0;i<pcbddc->benign_n;i++) { 1618 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1619 } 1620 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1621 } 1622 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1623 PetscFunctionReturn(0); 1624 } 1625 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1628 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1629 { 1630 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1631 PC_IS *pcis = (PC_IS*)pc->data; 1632 VecType impVecType; 1633 PetscInt n_constraints,n_R,old_size; 1634 PetscErrorCode ierr; 1635 1636 PetscFunctionBegin; 1637 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1638 /* get sizes */ 1639 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1640 n_R = pcis->n - pcbddc->n_vertices; 1641 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1642 /* local work vectors (try to avoid unneeded work)*/ 1643 /* R nodes */ 1644 old_size = -1; 1645 if (pcbddc->vec1_R) { 1646 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1647 } 1648 if (n_R != old_size) { 1649 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1650 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1651 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1652 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1653 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1654 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1655 } 1656 /* local primal dofs */ 1657 old_size = -1; 1658 if (pcbddc->vec1_P) { 1659 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1660 } 1661 if (pcbddc->local_primal_size != old_size) { 1662 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1663 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1664 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1665 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1666 } 1667 /* local explicit constraints */ 1668 old_size = -1; 1669 if (pcbddc->vec1_C) { 1670 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1671 } 1672 if (n_constraints && n_constraints != old_size) { 1673 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1674 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1675 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1676 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "PCBDDCSetUpCorrection" 1683 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1684 { 1685 PetscErrorCode ierr; 1686 /* pointers to pcis and pcbddc */ 1687 PC_IS* pcis = (PC_IS*)pc->data; 1688 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1689 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1690 /* submatrices of local problem */ 1691 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1692 /* submatrices of local coarse problem */ 1693 Mat S_VV,S_CV,S_VC,S_CC; 1694 /* working matrices */ 1695 Mat C_CR; 1696 /* additional working stuff */ 1697 PC pc_R; 1698 Mat F; 1699 Vec dummy_vec; 1700 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1701 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1702 PetscScalar *work; 1703 PetscInt *idx_V_B; 1704 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1705 PetscInt i,n_R,n_D,n_B; 1706 1707 /* some shortcuts to scalars */ 1708 PetscScalar one=1.0,m_one=-1.0; 1709 1710 PetscFunctionBegin; 1711 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1712 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1713 } 1714 1715 /* Set Non-overlapping dimensions */ 1716 n_vertices = pcbddc->n_vertices; 1717 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1718 n_B = pcis->n_B; 1719 n_D = pcis->n - n_B; 1720 n_R = pcis->n - n_vertices; 1721 1722 /* vertices in boundary numbering */ 1723 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1724 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1725 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1726 1727 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1728 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1729 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1730 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1731 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1732 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1733 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1734 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1735 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1736 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1737 1738 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1739 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1740 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1741 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1742 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1743 lda_rhs = n_R; 1744 need_benign_correction = PETSC_FALSE; 1745 if (isLU || isILU || isCHOL) { 1746 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1747 } else if (sub_schurs->reuse_solver) { 1748 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1749 MatFactorType type; 1750 1751 F = reuse_solver->F; 1752 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1753 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1754 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1755 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1756 } else { 1757 F = NULL; 1758 } 1759 1760 /* allocate workspace */ 1761 n = 0; 1762 if (n_constraints) { 1763 n += lda_rhs*n_constraints; 1764 } 1765 if (n_vertices) { 1766 n = PetscMax(2*lda_rhs*n_vertices,n); 1767 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1768 } 1769 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1770 1771 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1772 dummy_vec = NULL; 1773 if (need_benign_correction && lda_rhs != n_R && F) { 1774 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1775 } 1776 1777 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1778 if (n_constraints) { 1779 Mat M1,M2,M3,C_B; 1780 IS is_aux; 1781 PetscScalar *array,*array2; 1782 1783 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1784 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1785 1786 /* Extract constraints on R nodes: C_{CR} */ 1787 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1788 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1789 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1790 1791 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1792 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1793 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1794 for (i=0;i<n_constraints;i++) { 1795 const PetscScalar *row_cmat_values; 1796 const PetscInt *row_cmat_indices; 1797 PetscInt size_of_constraint,j; 1798 1799 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1800 for (j=0;j<size_of_constraint;j++) { 1801 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1802 } 1803 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1804 } 1805 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1806 if (F) { 1807 Mat B; 1808 1809 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1810 if (need_benign_correction) { 1811 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1812 1813 /* rhs is already zero on interior dofs, no need to change the rhs */ 1814 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1815 } 1816 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1817 if (need_benign_correction) { 1818 PetscScalar *marr; 1819 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1820 1821 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1822 if (lda_rhs != n_R) { 1823 for (i=0;i<n_constraints;i++) { 1824 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1825 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1826 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1827 } 1828 } else { 1829 for (i=0;i<n_constraints;i++) { 1830 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1831 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1832 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1833 } 1834 } 1835 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1836 } 1837 ierr = MatDestroy(&B);CHKERRQ(ierr); 1838 } else { 1839 PetscScalar *marr; 1840 1841 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1842 for (i=0;i<n_constraints;i++) { 1843 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1844 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1845 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1846 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1847 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1848 } 1849 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1850 } 1851 if (!pcbddc->switch_static) { 1852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1853 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1854 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1855 for (i=0;i<n_constraints;i++) { 1856 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1857 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1860 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1861 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1862 } 1863 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1864 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1865 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1866 } else { 1867 if (lda_rhs != n_R) { 1868 IS dummy; 1869 1870 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1871 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1872 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1873 } else { 1874 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1875 pcbddc->local_auxmat2 = local_auxmat2_R; 1876 } 1877 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1878 } 1879 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1880 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1881 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1882 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1883 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1884 if (isCHOL) { 1885 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1886 } else { 1887 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1888 } 1889 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1890 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1891 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1892 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1893 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1894 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1895 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1896 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1897 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1898 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1899 } 1900 1901 /* Get submatrices from subdomain matrix */ 1902 if (n_vertices) { 1903 IS is_aux; 1904 1905 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1906 IS tis; 1907 1908 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1909 ierr = ISSort(tis);CHKERRQ(ierr); 1910 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1911 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1912 } else { 1913 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1914 } 1915 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1916 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1917 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1918 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1919 } 1920 1921 /* Matrix of coarse basis functions (local) */ 1922 if (pcbddc->coarse_phi_B) { 1923 PetscInt on_B,on_primal,on_D=n_D; 1924 if (pcbddc->coarse_phi_D) { 1925 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1926 } 1927 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1928 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1929 PetscScalar *marray; 1930 1931 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1932 ierr = PetscFree(marray);CHKERRQ(ierr); 1933 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1934 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1935 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1936 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1937 } 1938 } 1939 1940 if (!pcbddc->coarse_phi_B) { 1941 PetscScalar *marray; 1942 1943 n = n_B*pcbddc->local_primal_size; 1944 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1945 n += n_D*pcbddc->local_primal_size; 1946 } 1947 if (!pcbddc->symmetric_primal) { 1948 n *= 2; 1949 } 1950 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1951 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1952 n = n_B*pcbddc->local_primal_size; 1953 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1955 n += n_D*pcbddc->local_primal_size; 1956 } 1957 if (!pcbddc->symmetric_primal) { 1958 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1959 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1960 n = n_B*pcbddc->local_primal_size; 1961 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1962 } 1963 } else { 1964 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1965 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1966 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1967 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1968 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1969 } 1970 } 1971 } 1972 1973 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1974 p0_lidx_I = NULL; 1975 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1976 const PetscInt *idxs; 1977 1978 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1979 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1980 for (i=0;i<pcbddc->benign_n;i++) { 1981 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1982 } 1983 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1984 } 1985 1986 /* vertices */ 1987 if (n_vertices) { 1988 1989 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1990 1991 if (n_R) { 1992 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1993 PetscBLASInt B_N,B_one = 1; 1994 PetscScalar *x,*y; 1995 PetscBool isseqaij; 1996 1997 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1998 if (need_benign_correction) { 1999 ISLocalToGlobalMapping RtoN; 2000 IS is_p0; 2001 PetscInt *idxs_p0,n; 2002 2003 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2004 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2005 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2006 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2007 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2008 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2009 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2010 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2011 } 2012 2013 if (lda_rhs == n_R) { 2014 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2015 } else { 2016 PetscScalar *av,*array; 2017 const PetscInt *xadj,*adjncy; 2018 PetscInt n; 2019 PetscBool flg_row; 2020 2021 array = work+lda_rhs*n_vertices; 2022 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2023 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2024 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2025 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2026 for (i=0;i<n;i++) { 2027 PetscInt j; 2028 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2029 } 2030 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2031 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2032 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2033 } 2034 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2035 if (need_benign_correction) { 2036 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2037 PetscScalar *marr; 2038 2039 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2040 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2041 2042 | 0 0 0 | (V) 2043 L = | 0 0 -1 | (P-p0) 2044 | 0 0 -1 | (p0) 2045 2046 */ 2047 for (i=0;i<reuse_solver->benign_n;i++) { 2048 const PetscScalar *vals; 2049 const PetscInt *idxs,*idxs_zero; 2050 PetscInt n,j,nz; 2051 2052 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2053 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2054 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2055 for (j=0;j<n;j++) { 2056 PetscScalar val = vals[j]; 2057 PetscInt k,col = idxs[j]; 2058 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2059 } 2060 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2061 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2062 } 2063 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2064 } 2065 if (F) { 2066 /* need to correct the rhs */ 2067 if (need_benign_correction) { 2068 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2069 PetscScalar *marr; 2070 2071 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2072 if (lda_rhs != n_R) { 2073 for (i=0;i<n_vertices;i++) { 2074 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2075 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2076 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2077 } 2078 } else { 2079 for (i=0;i<n_vertices;i++) { 2080 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2081 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2082 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2083 } 2084 } 2085 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2086 } 2087 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2088 /* need to correct the solution */ 2089 if (need_benign_correction) { 2090 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2091 PetscScalar *marr; 2092 2093 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2094 if (lda_rhs != n_R) { 2095 for (i=0;i<n_vertices;i++) { 2096 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2097 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2098 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2099 } 2100 } else { 2101 for (i=0;i<n_vertices;i++) { 2102 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2103 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2104 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2105 } 2106 } 2107 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2108 } 2109 } else { 2110 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2111 for (i=0;i<n_vertices;i++) { 2112 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2113 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2114 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2115 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2116 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2117 } 2118 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2119 } 2120 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2121 /* S_VV and S_CV */ 2122 if (n_constraints) { 2123 Mat B; 2124 2125 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2126 for (i=0;i<n_vertices;i++) { 2127 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2128 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2129 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2130 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2131 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2132 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2133 } 2134 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2135 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2136 ierr = MatDestroy(&B);CHKERRQ(ierr); 2137 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2138 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2139 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2140 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2141 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2142 ierr = MatDestroy(&B);CHKERRQ(ierr); 2143 } 2144 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2145 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2146 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2147 } 2148 if (lda_rhs != n_R) { 2149 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2150 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2151 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2152 } 2153 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2154 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2155 if (need_benign_correction) { 2156 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2157 PetscScalar *marr,*sums; 2158 2159 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2160 ierr = MatDenseGetArray(S_VVt,&marr); 2161 for (i=0;i<reuse_solver->benign_n;i++) { 2162 const PetscScalar *vals; 2163 const PetscInt *idxs,*idxs_zero; 2164 PetscInt n,j,nz; 2165 2166 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2167 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2168 for (j=0;j<n_vertices;j++) { 2169 PetscInt k; 2170 sums[j] = 0.; 2171 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2172 } 2173 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2174 for (j=0;j<n;j++) { 2175 PetscScalar val = vals[j]; 2176 PetscInt k; 2177 for (k=0;k<n_vertices;k++) { 2178 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2179 } 2180 } 2181 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2182 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2183 } 2184 ierr = PetscFree(sums);CHKERRQ(ierr); 2185 ierr = MatDenseRestoreArray(S_VVt,&marr); 2186 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2187 } 2188 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2189 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2190 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2191 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2192 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2193 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2194 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2195 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2196 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2197 } else { 2198 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2199 } 2200 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2201 2202 /* coarse basis functions */ 2203 for (i=0;i<n_vertices;i++) { 2204 PetscScalar *y; 2205 2206 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2207 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2208 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2209 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2210 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2211 y[n_B*i+idx_V_B[i]] = 1.0; 2212 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2213 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2214 2215 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2216 PetscInt j; 2217 2218 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2219 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2220 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2221 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2222 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2223 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2224 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2225 } 2226 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2227 } 2228 /* if n_R == 0 the object is not destroyed */ 2229 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2230 } 2231 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2232 2233 if (n_constraints) { 2234 Mat B; 2235 2236 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2237 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2238 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2239 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2240 if (n_vertices) { 2241 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2242 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2243 } else { 2244 Mat S_VCt; 2245 2246 if (lda_rhs != n_R) { 2247 ierr = MatDestroy(&B);CHKERRQ(ierr); 2248 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2249 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2250 } 2251 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2252 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2253 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2254 } 2255 } 2256 ierr = MatDestroy(&B);CHKERRQ(ierr); 2257 /* coarse basis functions */ 2258 for (i=0;i<n_constraints;i++) { 2259 PetscScalar *y; 2260 2261 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2262 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2263 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2264 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2265 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2266 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2267 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2268 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2269 PetscInt j; 2270 2271 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2272 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2273 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2274 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2275 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2276 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2277 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2278 } 2279 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2280 } 2281 } 2282 if (n_constraints) { 2283 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2284 } 2285 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2286 2287 /* coarse matrix entries relative to B_0 */ 2288 if (pcbddc->benign_n) { 2289 Mat B0_B,B0_BPHI; 2290 IS is_dummy; 2291 PetscScalar *data; 2292 PetscInt j; 2293 2294 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2295 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2296 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2297 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2298 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2299 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2300 for (j=0;j<pcbddc->benign_n;j++) { 2301 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2302 for (i=0;i<pcbddc->local_primal_size;i++) { 2303 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2304 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2305 } 2306 } 2307 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2308 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2309 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2310 } 2311 2312 /* compute other basis functions for non-symmetric problems */ 2313 if (!pcbddc->symmetric_primal) { 2314 Mat B_V=NULL,B_C=NULL; 2315 PetscScalar *marray; 2316 2317 if (n_constraints) { 2318 Mat S_CCT,C_CRT; 2319 2320 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2321 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2322 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2323 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2324 if (n_vertices) { 2325 Mat S_VCT; 2326 2327 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2328 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2329 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2330 } 2331 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2332 } 2333 if (n_vertices && n_R) { 2334 PetscScalar *av,*marray; 2335 const PetscInt *xadj,*adjncy; 2336 PetscInt n; 2337 PetscBool flg_row; 2338 2339 /* B_V = B_V - A_VR^T */ 2340 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2341 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2342 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2343 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2344 for (i=0;i<n;i++) { 2345 PetscInt j; 2346 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2347 } 2348 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2349 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2350 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2351 } 2352 2353 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2354 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2355 for (i=0;i<n_vertices;i++) { 2356 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2357 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2358 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2359 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2360 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2361 } 2362 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2363 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2364 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2365 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2366 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2367 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2368 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2369 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2370 } 2371 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2372 /* coarse basis functions */ 2373 for (i=0;i<pcbddc->local_primal_size;i++) { 2374 PetscScalar *y; 2375 2376 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2377 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2378 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2379 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2380 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2381 if (i<n_vertices) { 2382 y[n_B*i+idx_V_B[i]] = 1.0; 2383 } 2384 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2385 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2386 2387 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2388 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2389 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2390 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2391 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2392 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2393 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2394 } 2395 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2396 } 2397 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2398 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2399 } 2400 /* free memory */ 2401 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2402 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2403 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2404 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2405 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2406 ierr = PetscFree(work);CHKERRQ(ierr); 2407 if (n_vertices) { 2408 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2409 } 2410 if (n_constraints) { 2411 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2412 } 2413 /* Checking coarse_sub_mat and coarse basis functios */ 2414 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2415 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2416 if (pcbddc->dbg_flag) { 2417 Mat coarse_sub_mat; 2418 Mat AUXMAT,TM1,TM2,TM3,TM4; 2419 Mat coarse_phi_D,coarse_phi_B; 2420 Mat coarse_psi_D,coarse_psi_B; 2421 Mat A_II,A_BB,A_IB,A_BI; 2422 Mat C_B,CPHI; 2423 IS is_dummy; 2424 Vec mones; 2425 MatType checkmattype=MATSEQAIJ; 2426 PetscReal real_value; 2427 2428 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2429 Mat A; 2430 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2431 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2432 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2433 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2434 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2435 ierr = MatDestroy(&A);CHKERRQ(ierr); 2436 } else { 2437 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2438 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2439 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2440 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2441 } 2442 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2443 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2444 if (!pcbddc->symmetric_primal) { 2445 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2446 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2447 } 2448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2449 2450 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2451 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2452 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2453 if (!pcbddc->symmetric_primal) { 2454 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2455 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2456 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2457 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2458 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2459 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2460 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2461 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2462 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2463 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2464 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2465 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2466 } else { 2467 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2468 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2469 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2470 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2471 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2472 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2473 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2474 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2475 } 2476 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2477 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2478 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2479 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2480 if (pcbddc->benign_n) { 2481 Mat B0_B,B0_BPHI; 2482 PetscScalar *data,*data2; 2483 PetscInt j; 2484 2485 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2486 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2487 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2488 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2489 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2490 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2491 for (j=0;j<pcbddc->benign_n;j++) { 2492 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2493 for (i=0;i<pcbddc->local_primal_size;i++) { 2494 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2495 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2496 } 2497 } 2498 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2499 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2500 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2501 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2502 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2503 } 2504 #if 0 2505 { 2506 PetscViewer viewer; 2507 char filename[256]; 2508 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2509 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2510 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2511 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2512 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2513 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2514 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2515 if (save_change) { 2516 Mat phi_B; 2517 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2518 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2519 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2520 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2521 } else { 2522 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2523 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2524 } 2525 if (pcbddc->coarse_phi_D) { 2526 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2527 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2528 } 2529 if (pcbddc->coarse_psi_B) { 2530 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2531 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2532 } 2533 if (pcbddc->coarse_psi_D) { 2534 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2535 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2536 } 2537 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2538 } 2539 #endif 2540 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2541 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2542 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2544 2545 /* check constraints */ 2546 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2547 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2548 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2549 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2550 } else { 2551 PetscScalar *data; 2552 Mat tmat; 2553 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2554 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2555 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2556 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2557 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2558 } 2559 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2560 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2561 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2562 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2564 if (!pcbddc->symmetric_primal) { 2565 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2566 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2567 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2568 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2570 } 2571 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2572 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2573 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2574 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2576 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2577 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2578 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2579 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2580 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2581 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2582 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2583 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2584 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2585 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2586 if (!pcbddc->symmetric_primal) { 2587 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2588 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2589 } 2590 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2591 } 2592 /* get back data */ 2593 *coarse_submat_vals_n = coarse_submat_vals; 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2599 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2600 { 2601 Mat *work_mat; 2602 IS isrow_s,iscol_s; 2603 PetscBool rsorted,csorted; 2604 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2605 PetscErrorCode ierr; 2606 2607 PetscFunctionBegin; 2608 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2609 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2610 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2611 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2612 2613 if (!rsorted) { 2614 const PetscInt *idxs; 2615 PetscInt *idxs_sorted,i; 2616 2617 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2618 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2619 for (i=0;i<rsize;i++) { 2620 idxs_perm_r[i] = i; 2621 } 2622 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2623 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2624 for (i=0;i<rsize;i++) { 2625 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2626 } 2627 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2628 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2629 } else { 2630 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2631 isrow_s = isrow; 2632 } 2633 2634 if (!csorted) { 2635 if (isrow == iscol) { 2636 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2637 iscol_s = isrow_s; 2638 } else { 2639 const PetscInt *idxs; 2640 PetscInt *idxs_sorted,i; 2641 2642 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2643 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2644 for (i=0;i<csize;i++) { 2645 idxs_perm_c[i] = i; 2646 } 2647 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2648 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2649 for (i=0;i<csize;i++) { 2650 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2651 } 2652 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2653 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2654 } 2655 } else { 2656 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2657 iscol_s = iscol; 2658 } 2659 2660 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2661 2662 if (!rsorted || !csorted) { 2663 Mat new_mat; 2664 IS is_perm_r,is_perm_c; 2665 2666 if (!rsorted) { 2667 PetscInt *idxs_r,i; 2668 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2669 for (i=0;i<rsize;i++) { 2670 idxs_r[idxs_perm_r[i]] = i; 2671 } 2672 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2673 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2674 } else { 2675 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2676 } 2677 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2678 2679 if (!csorted) { 2680 if (isrow_s == iscol_s) { 2681 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2682 is_perm_c = is_perm_r; 2683 } else { 2684 PetscInt *idxs_c,i; 2685 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2686 for (i=0;i<csize;i++) { 2687 idxs_c[idxs_perm_c[i]] = i; 2688 } 2689 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2690 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2691 } 2692 } else { 2693 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2694 } 2695 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2696 2697 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2698 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2699 work_mat[0] = new_mat; 2700 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2701 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2702 } 2703 2704 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2705 *B = work_mat[0]; 2706 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2707 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2708 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2714 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2715 { 2716 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2717 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2718 Mat new_mat; 2719 IS is_local,is_global; 2720 PetscInt local_size; 2721 PetscBool isseqaij; 2722 PetscErrorCode ierr; 2723 2724 PetscFunctionBegin; 2725 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2726 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2727 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2728 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2729 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2730 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2731 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2732 2733 /* check */ 2734 if (pcbddc->dbg_flag) { 2735 Vec x,x_change; 2736 PetscReal error; 2737 2738 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2739 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2740 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2741 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2742 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2743 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2744 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2745 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2746 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2747 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2748 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2749 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2750 ierr = VecDestroy(&x);CHKERRQ(ierr); 2751 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2752 } 2753 2754 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2755 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2756 if (isseqaij) { 2757 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2758 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2759 } else { 2760 Mat work_mat; 2761 2762 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2763 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2764 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2765 } 2766 if (matis->A->symmetric_set) { 2767 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2768 #if !defined(PETSC_USE_COMPLEX) 2769 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2770 #endif 2771 } 2772 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2773 PetscFunctionReturn(0); 2774 } 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2778 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2779 { 2780 PC_IS* pcis = (PC_IS*)(pc->data); 2781 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2782 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2783 PetscInt *idx_R_local=NULL; 2784 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2785 PetscInt vbs,bs; 2786 PetscBT bitmask=NULL; 2787 PetscErrorCode ierr; 2788 2789 PetscFunctionBegin; 2790 /* 2791 No need to setup local scatters if 2792 - primal space is unchanged 2793 AND 2794 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2795 AND 2796 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2797 */ 2798 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2799 PetscFunctionReturn(0); 2800 } 2801 /* destroy old objects */ 2802 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2803 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2804 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2805 /* Set Non-overlapping dimensions */ 2806 n_B = pcis->n_B; 2807 n_D = pcis->n - n_B; 2808 n_vertices = pcbddc->n_vertices; 2809 2810 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2811 2812 /* create auxiliary bitmask and allocate workspace */ 2813 if (!sub_schurs->reuse_solver) { 2814 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2815 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2816 for (i=0;i<n_vertices;i++) { 2817 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2818 } 2819 2820 for (i=0, n_R=0; i<pcis->n; i++) { 2821 if (!PetscBTLookup(bitmask,i)) { 2822 idx_R_local[n_R++] = i; 2823 } 2824 } 2825 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2826 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2827 2828 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2829 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2830 } 2831 2832 /* Block code */ 2833 vbs = 1; 2834 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2835 if (bs>1 && !(n_vertices%bs)) { 2836 PetscBool is_blocked = PETSC_TRUE; 2837 PetscInt *vary; 2838 if (!sub_schurs->reuse_solver) { 2839 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2840 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2841 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2842 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2843 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2844 for (i=0; i<pcis->n/bs; i++) { 2845 if (vary[i]!=0 && vary[i]!=bs) { 2846 is_blocked = PETSC_FALSE; 2847 break; 2848 } 2849 } 2850 ierr = PetscFree(vary);CHKERRQ(ierr); 2851 } else { 2852 /* Verify directly the R set */ 2853 for (i=0; i<n_R/bs; i++) { 2854 PetscInt j,node=idx_R_local[bs*i]; 2855 for (j=1; j<bs; j++) { 2856 if (node != idx_R_local[bs*i+j]-j) { 2857 is_blocked = PETSC_FALSE; 2858 break; 2859 } 2860 } 2861 } 2862 } 2863 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2864 vbs = bs; 2865 for (i=0;i<n_R/vbs;i++) { 2866 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2867 } 2868 } 2869 } 2870 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2871 if (sub_schurs->reuse_solver) { 2872 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2873 2874 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2875 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2876 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2877 reuse_solver->is_R = pcbddc->is_R_local; 2878 } else { 2879 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2880 } 2881 2882 /* print some info if requested */ 2883 if (pcbddc->dbg_flag) { 2884 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2885 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2886 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2889 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2890 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2891 } 2892 2893 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2894 if (!sub_schurs->reuse_solver) { 2895 IS is_aux1,is_aux2; 2896 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2897 2898 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2899 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2900 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2901 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2902 for (i=0; i<n_D; i++) { 2903 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2904 } 2905 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2906 for (i=0, j=0; i<n_R; i++) { 2907 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2908 aux_array1[j++] = i; 2909 } 2910 } 2911 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2912 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2913 for (i=0, j=0; i<n_B; i++) { 2914 if (!PetscBTLookup(bitmask,is_indices[i])) { 2915 aux_array2[j++] = i; 2916 } 2917 } 2918 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2919 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2920 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2921 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2922 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2923 2924 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2925 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2926 for (i=0, j=0; i<n_R; i++) { 2927 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2928 aux_array1[j++] = i; 2929 } 2930 } 2931 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2932 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2933 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2934 } 2935 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2936 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2937 } else { 2938 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2939 IS tis; 2940 PetscInt schur_size; 2941 2942 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2943 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2944 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2945 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2946 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2947 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2948 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2949 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2950 } 2951 } 2952 PetscFunctionReturn(0); 2953 } 2954 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2958 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2959 { 2960 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2961 PC_IS *pcis = (PC_IS*)pc->data; 2962 PC pc_temp; 2963 Mat A_RR; 2964 MatReuse reuse; 2965 PetscScalar m_one = -1.0; 2966 PetscReal value; 2967 PetscInt n_D,n_R; 2968 PetscBool check_corr[2],issbaij; 2969 PetscErrorCode ierr; 2970 /* prefixes stuff */ 2971 char dir_prefix[256],neu_prefix[256],str_level[16]; 2972 size_t len; 2973 2974 PetscFunctionBegin; 2975 2976 /* compute prefixes */ 2977 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2978 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2979 if (!pcbddc->current_level) { 2980 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2981 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2982 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2983 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2984 } else { 2985 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2986 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2987 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2988 len -= 15; /* remove "pc_bddc_coarse_" */ 2989 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2990 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2991 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2992 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2993 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2994 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2995 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2996 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2997 } 2998 2999 /* DIRICHLET PROBLEM */ 3000 if (dirichlet) { 3001 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3002 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3003 if (!sub_schurs->reuse_solver) { 3004 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3005 } 3006 if (pcbddc->dbg_flag) { 3007 Mat A_IIn; 3008 3009 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3010 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3011 pcis->A_II = A_IIn; 3012 } 3013 } 3014 if (pcbddc->local_mat->symmetric_set) { 3015 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3016 } 3017 /* Matrix for Dirichlet problem is pcis->A_II */ 3018 n_D = pcis->n - pcis->n_B; 3019 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3020 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3021 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3022 /* default */ 3023 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3024 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3025 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3026 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3027 if (issbaij) { 3028 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3029 } else { 3030 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3031 } 3032 /* Allow user's customization */ 3033 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3034 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3035 } 3036 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3037 if (sub_schurs->reuse_solver) { 3038 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3039 3040 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3041 } 3042 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3043 if (!n_D) { 3044 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3045 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3046 } 3047 /* Set Up KSP for Dirichlet problem of BDDC */ 3048 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3049 /* set ksp_D into pcis data */ 3050 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3051 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3052 pcis->ksp_D = pcbddc->ksp_D; 3053 } 3054 3055 /* NEUMANN PROBLEM */ 3056 A_RR = 0; 3057 if (neumann) { 3058 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3059 PetscInt ibs,mbs; 3060 PetscBool issbaij; 3061 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3062 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3063 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3064 if (pcbddc->ksp_R) { /* already created ksp */ 3065 PetscInt nn_R; 3066 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3067 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3068 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3069 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3070 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3071 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3072 reuse = MAT_INITIAL_MATRIX; 3073 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3074 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3075 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3076 reuse = MAT_INITIAL_MATRIX; 3077 } else { /* safe to reuse the matrix */ 3078 reuse = MAT_REUSE_MATRIX; 3079 } 3080 } 3081 /* last check */ 3082 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3083 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3084 reuse = MAT_INITIAL_MATRIX; 3085 } 3086 } else { /* first time, so we need to create the matrix */ 3087 reuse = MAT_INITIAL_MATRIX; 3088 } 3089 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3090 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3091 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3092 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3093 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3094 if (matis->A == pcbddc->local_mat) { 3095 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3096 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3097 } else { 3098 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3099 } 3100 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3101 if (matis->A == pcbddc->local_mat) { 3102 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3103 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3104 } else { 3105 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3106 } 3107 } 3108 /* extract A_RR */ 3109 if (sub_schurs->reuse_solver) { 3110 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3111 3112 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3113 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3114 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3115 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3116 } else { 3117 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3118 } 3119 } else { 3120 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3121 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3122 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3123 } 3124 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3125 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3126 } 3127 if (pcbddc->local_mat->symmetric_set) { 3128 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3129 } 3130 if (!pcbddc->ksp_R) { /* create object if not present */ 3131 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3132 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3133 /* default */ 3134 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3135 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3136 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3137 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3138 if (issbaij) { 3139 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3140 } else { 3141 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3142 } 3143 /* Allow user's customization */ 3144 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3145 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3146 } 3147 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3148 if (!n_R) { 3149 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3150 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3151 } 3152 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3153 /* Reuse solver if it is present */ 3154 if (sub_schurs->reuse_solver) { 3155 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3156 3157 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3158 } 3159 /* Set Up KSP for Neumann problem of BDDC */ 3160 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3161 } 3162 3163 if (pcbddc->dbg_flag) { 3164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3165 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3167 } 3168 3169 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3170 check_corr[0] = check_corr[1] = PETSC_FALSE; 3171 if (pcbddc->NullSpace_corr[0]) { 3172 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3173 } 3174 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3175 check_corr[0] = PETSC_TRUE; 3176 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3177 } 3178 if (neumann && pcbddc->NullSpace_corr[2]) { 3179 check_corr[1] = PETSC_TRUE; 3180 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3181 } 3182 3183 /* check Dirichlet and Neumann solvers */ 3184 if (pcbddc->dbg_flag) { 3185 if (dirichlet) { /* Dirichlet */ 3186 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3187 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3188 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3189 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3190 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3191 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3192 if (check_corr[0]) { 3193 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3194 } 3195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3196 } 3197 if (neumann) { /* Neumann */ 3198 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3199 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3200 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3201 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3202 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3203 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3204 if (check_corr[1]) { 3205 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3206 } 3207 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3208 } 3209 } 3210 /* free Neumann problem's matrix */ 3211 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3212 PetscFunctionReturn(0); 3213 } 3214 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3217 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3218 { 3219 PetscErrorCode ierr; 3220 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3221 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3222 3223 PetscFunctionBegin; 3224 if (!sub_schurs->reuse_solver) { 3225 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3226 } 3227 if (!pcbddc->switch_static) { 3228 if (applytranspose && pcbddc->local_auxmat1) { 3229 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3230 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3231 } 3232 if (!sub_schurs->reuse_solver) { 3233 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3234 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3235 } else { 3236 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3237 3238 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3239 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3240 } 3241 } else { 3242 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3243 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3244 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3245 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3246 if (applytranspose && pcbddc->local_auxmat1) { 3247 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3248 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3249 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3250 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3251 } 3252 } 3253 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3254 if (applytranspose) { 3255 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3256 } else { 3257 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3258 } 3259 } else { 3260 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3261 3262 if (applytranspose) { 3263 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3264 } else { 3265 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3266 } 3267 } 3268 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3269 if (!pcbddc->switch_static) { 3270 if (!sub_schurs->reuse_solver) { 3271 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3272 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3273 } else { 3274 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3275 3276 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3277 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3278 } 3279 if (!applytranspose && pcbddc->local_auxmat1) { 3280 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3281 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3282 } 3283 } else { 3284 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3285 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3286 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3287 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3288 if (!applytranspose && pcbddc->local_auxmat1) { 3289 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3290 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3291 } 3292 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3293 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3294 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3295 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3296 } 3297 PetscFunctionReturn(0); 3298 } 3299 3300 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3301 #undef __FUNCT__ 3302 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3303 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3304 { 3305 PetscErrorCode ierr; 3306 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3307 PC_IS* pcis = (PC_IS*) (pc->data); 3308 const PetscScalar zero = 0.0; 3309 3310 PetscFunctionBegin; 3311 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3312 if (!pcbddc->benign_apply_coarse_only) { 3313 if (applytranspose) { 3314 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3315 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3316 } else { 3317 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3318 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3319 } 3320 } else { 3321 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3322 } 3323 3324 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3325 if (pcbddc->benign_n) { 3326 PetscScalar *array; 3327 PetscInt j; 3328 3329 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3330 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3331 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3332 } 3333 3334 /* start communications from local primal nodes to rhs of coarse solver */ 3335 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3336 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3337 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3338 3339 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3340 /* TODO remove null space when doing multilevel */ 3341 if (pcbddc->coarse_ksp) { 3342 Mat coarse_mat; 3343 Vec rhs,sol; 3344 MatNullSpace nullsp; 3345 PetscBool isbddc = PETSC_FALSE; 3346 3347 if (pcbddc->benign_have_null) { 3348 PC coarse_pc; 3349 3350 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3351 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3352 /* we need to propagate to coarser levels the need for a possible benign correction */ 3353 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3354 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3355 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3356 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3357 } 3358 } 3359 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3360 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3361 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3362 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3363 if (nullsp) { 3364 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3365 } 3366 if (applytranspose) { 3367 if (pcbddc->benign_apply_coarse_only) { 3368 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3369 } else { 3370 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3371 } 3372 } else { 3373 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3374 PC coarse_pc; 3375 3376 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3377 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3378 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3379 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3380 } else { 3381 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3382 } 3383 } 3384 /* we don't the benign correction at coarser levels anymore */ 3385 if (pcbddc->benign_have_null && isbddc) { 3386 PC coarse_pc; 3387 PC_BDDC* coarsepcbddc; 3388 3389 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3390 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3391 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3392 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3393 } 3394 if (nullsp) { 3395 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3396 } 3397 } 3398 3399 /* Local solution on R nodes */ 3400 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3401 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3402 } 3403 /* communications from coarse sol to local primal nodes */ 3404 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3405 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3406 3407 /* Sum contributions from the two levels */ 3408 if (!pcbddc->benign_apply_coarse_only) { 3409 if (applytranspose) { 3410 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3411 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3412 } else { 3413 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3414 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3415 } 3416 /* store p0 */ 3417 if (pcbddc->benign_n) { 3418 PetscScalar *array; 3419 PetscInt j; 3420 3421 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3422 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3423 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3424 } 3425 } else { /* expand the coarse solution */ 3426 if (applytranspose) { 3427 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3428 } else { 3429 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3430 } 3431 } 3432 PetscFunctionReturn(0); 3433 } 3434 3435 #undef __FUNCT__ 3436 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3437 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3438 { 3439 PetscErrorCode ierr; 3440 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3441 PetscScalar *array; 3442 Vec from,to; 3443 3444 PetscFunctionBegin; 3445 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3446 from = pcbddc->coarse_vec; 3447 to = pcbddc->vec1_P; 3448 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3449 Vec tvec; 3450 3451 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3452 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3453 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3454 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3455 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3456 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3457 } 3458 } else { /* from local to global -> put data in coarse right hand side */ 3459 from = pcbddc->vec1_P; 3460 to = pcbddc->coarse_vec; 3461 } 3462 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 #undef __FUNCT__ 3467 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3468 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3469 { 3470 PetscErrorCode ierr; 3471 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3472 PetscScalar *array; 3473 Vec from,to; 3474 3475 PetscFunctionBegin; 3476 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3477 from = pcbddc->coarse_vec; 3478 to = pcbddc->vec1_P; 3479 } else { /* from local to global -> put data in coarse right hand side */ 3480 from = pcbddc->vec1_P; 3481 to = pcbddc->coarse_vec; 3482 } 3483 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3484 if (smode == SCATTER_FORWARD) { 3485 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3486 Vec tvec; 3487 3488 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3489 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3490 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3491 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3492 } 3493 } else { 3494 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3495 ierr = VecResetArray(from);CHKERRQ(ierr); 3496 } 3497 } 3498 PetscFunctionReturn(0); 3499 } 3500 3501 /* uncomment for testing purposes */ 3502 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3505 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3506 { 3507 PetscErrorCode ierr; 3508 PC_IS* pcis = (PC_IS*)(pc->data); 3509 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3510 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3511 /* one and zero */ 3512 PetscScalar one=1.0,zero=0.0; 3513 /* space to store constraints and their local indices */ 3514 PetscScalar *constraints_data; 3515 PetscInt *constraints_idxs,*constraints_idxs_B; 3516 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3517 PetscInt *constraints_n; 3518 /* iterators */ 3519 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3520 /* BLAS integers */ 3521 PetscBLASInt lwork,lierr; 3522 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3523 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3524 /* reuse */ 3525 PetscInt olocal_primal_size,olocal_primal_size_cc; 3526 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3527 /* change of basis */ 3528 PetscBool qr_needed; 3529 PetscBT change_basis,qr_needed_idx; 3530 /* auxiliary stuff */ 3531 PetscInt *nnz,*is_indices; 3532 PetscInt ncc; 3533 /* some quantities */ 3534 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3535 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3536 3537 PetscFunctionBegin; 3538 /* Destroy Mat objects computed previously */ 3539 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3540 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3541 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3542 /* save info on constraints from previous setup (if any) */ 3543 olocal_primal_size = pcbddc->local_primal_size; 3544 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3545 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3546 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3547 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3548 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3549 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3550 3551 if (!pcbddc->adaptive_selection) { 3552 IS ISForVertices,*ISForFaces,*ISForEdges; 3553 MatNullSpace nearnullsp; 3554 const Vec *nearnullvecs; 3555 Vec *localnearnullsp; 3556 PetscScalar *array; 3557 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3558 PetscBool nnsp_has_cnst; 3559 /* LAPACK working arrays for SVD or POD */ 3560 PetscBool skip_lapack,boolforchange; 3561 PetscScalar *work; 3562 PetscReal *singular_vals; 3563 #if defined(PETSC_USE_COMPLEX) 3564 PetscReal *rwork; 3565 #endif 3566 #if defined(PETSC_MISSING_LAPACK_GESVD) 3567 PetscScalar *temp_basis,*correlation_mat; 3568 #else 3569 PetscBLASInt dummy_int=1; 3570 PetscScalar dummy_scalar=1.; 3571 #endif 3572 3573 /* Get index sets for faces, edges and vertices from graph */ 3574 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3575 /* print some info */ 3576 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3577 PetscInt nv; 3578 3579 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3580 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3581 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3583 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3585 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3587 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3588 } 3589 3590 /* free unneeded index sets */ 3591 if (!pcbddc->use_vertices) { 3592 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3593 } 3594 if (!pcbddc->use_edges) { 3595 for (i=0;i<n_ISForEdges;i++) { 3596 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3597 } 3598 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3599 n_ISForEdges = 0; 3600 } 3601 if (!pcbddc->use_faces) { 3602 for (i=0;i<n_ISForFaces;i++) { 3603 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3604 } 3605 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3606 n_ISForFaces = 0; 3607 } 3608 3609 /* check if near null space is attached to global mat */ 3610 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3611 if (nearnullsp) { 3612 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3613 /* remove any stored info */ 3614 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3615 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3616 /* store information for BDDC solver reuse */ 3617 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3618 pcbddc->onearnullspace = nearnullsp; 3619 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3620 for (i=0;i<nnsp_size;i++) { 3621 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3622 } 3623 } else { /* if near null space is not provided BDDC uses constants by default */ 3624 nnsp_size = 0; 3625 nnsp_has_cnst = PETSC_TRUE; 3626 } 3627 /* get max number of constraints on a single cc */ 3628 max_constraints = nnsp_size; 3629 if (nnsp_has_cnst) max_constraints++; 3630 3631 /* 3632 Evaluate maximum storage size needed by the procedure 3633 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3634 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3635 There can be multiple constraints per connected component 3636 */ 3637 n_vertices = 0; 3638 if (ISForVertices) { 3639 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3640 } 3641 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3642 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3643 3644 total_counts = n_ISForFaces+n_ISForEdges; 3645 total_counts *= max_constraints; 3646 total_counts += n_vertices; 3647 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3648 3649 total_counts = 0; 3650 max_size_of_constraint = 0; 3651 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3652 IS used_is; 3653 if (i<n_ISForEdges) { 3654 used_is = ISForEdges[i]; 3655 } else { 3656 used_is = ISForFaces[i-n_ISForEdges]; 3657 } 3658 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3659 total_counts += j; 3660 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3661 } 3662 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3663 3664 /* get local part of global near null space vectors */ 3665 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3666 for (k=0;k<nnsp_size;k++) { 3667 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3668 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3669 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3670 } 3671 3672 /* whether or not to skip lapack calls */ 3673 skip_lapack = PETSC_TRUE; 3674 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3675 3676 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3677 if (!skip_lapack) { 3678 PetscScalar temp_work; 3679 3680 #if defined(PETSC_MISSING_LAPACK_GESVD) 3681 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3682 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3683 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3684 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3685 #if defined(PETSC_USE_COMPLEX) 3686 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3687 #endif 3688 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3689 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3690 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3691 lwork = -1; 3692 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3693 #if !defined(PETSC_USE_COMPLEX) 3694 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3695 #else 3696 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3697 #endif 3698 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3699 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3700 #else /* on missing GESVD */ 3701 /* SVD */ 3702 PetscInt max_n,min_n; 3703 max_n = max_size_of_constraint; 3704 min_n = max_constraints; 3705 if (max_size_of_constraint < max_constraints) { 3706 min_n = max_size_of_constraint; 3707 max_n = max_constraints; 3708 } 3709 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3710 #if defined(PETSC_USE_COMPLEX) 3711 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3712 #endif 3713 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3714 lwork = -1; 3715 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3716 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3717 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3718 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3719 #if !defined(PETSC_USE_COMPLEX) 3720 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3721 #else 3722 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3723 #endif 3724 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3725 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3726 #endif /* on missing GESVD */ 3727 /* Allocate optimal workspace */ 3728 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3729 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3730 } 3731 /* Now we can loop on constraining sets */ 3732 total_counts = 0; 3733 constraints_idxs_ptr[0] = 0; 3734 constraints_data_ptr[0] = 0; 3735 /* vertices */ 3736 if (n_vertices) { 3737 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3738 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3739 for (i=0;i<n_vertices;i++) { 3740 constraints_n[total_counts] = 1; 3741 constraints_data[total_counts] = 1.0; 3742 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3743 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3744 total_counts++; 3745 } 3746 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3747 n_vertices = total_counts; 3748 } 3749 3750 /* edges and faces */ 3751 total_counts_cc = total_counts; 3752 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3753 IS used_is; 3754 PetscBool idxs_copied = PETSC_FALSE; 3755 3756 if (ncc<n_ISForEdges) { 3757 used_is = ISForEdges[ncc]; 3758 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3759 } else { 3760 used_is = ISForFaces[ncc-n_ISForEdges]; 3761 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3762 } 3763 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3764 3765 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3766 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3767 /* change of basis should not be performed on local periodic nodes */ 3768 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3769 if (nnsp_has_cnst) { 3770 PetscScalar quad_value; 3771 3772 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3773 idxs_copied = PETSC_TRUE; 3774 3775 if (!pcbddc->use_nnsp_true) { 3776 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3777 } else { 3778 quad_value = 1.0; 3779 } 3780 for (j=0;j<size_of_constraint;j++) { 3781 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3782 } 3783 temp_constraints++; 3784 total_counts++; 3785 } 3786 for (k=0;k<nnsp_size;k++) { 3787 PetscReal real_value; 3788 PetscScalar *ptr_to_data; 3789 3790 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3791 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3792 for (j=0;j<size_of_constraint;j++) { 3793 ptr_to_data[j] = array[is_indices[j]]; 3794 } 3795 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3796 /* check if array is null on the connected component */ 3797 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3798 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3799 if (real_value > 0.0) { /* keep indices and values */ 3800 temp_constraints++; 3801 total_counts++; 3802 if (!idxs_copied) { 3803 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3804 idxs_copied = PETSC_TRUE; 3805 } 3806 } 3807 } 3808 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3809 valid_constraints = temp_constraints; 3810 if (!pcbddc->use_nnsp_true && temp_constraints) { 3811 if (temp_constraints == 1) { /* just normalize the constraint */ 3812 PetscScalar norm,*ptr_to_data; 3813 3814 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3815 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3816 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3817 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3818 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3819 } else { /* perform SVD */ 3820 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3821 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3822 3823 #if defined(PETSC_MISSING_LAPACK_GESVD) 3824 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3825 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3826 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3827 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3828 from that computed using LAPACKgesvd 3829 -> This is due to a different computation of eigenvectors in LAPACKheev 3830 -> The quality of the POD-computed basis will be the same */ 3831 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3832 /* Store upper triangular part of correlation matrix */ 3833 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3834 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3835 for (j=0;j<temp_constraints;j++) { 3836 for (k=0;k<j+1;k++) { 3837 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3838 } 3839 } 3840 /* compute eigenvalues and eigenvectors of correlation matrix */ 3841 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3842 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3843 #if !defined(PETSC_USE_COMPLEX) 3844 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3845 #else 3846 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3847 #endif 3848 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3849 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3850 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3851 j = 0; 3852 while (j < temp_constraints && singular_vals[j] < tol) j++; 3853 total_counts = total_counts-j; 3854 valid_constraints = temp_constraints-j; 3855 /* scale and copy POD basis into used quadrature memory */ 3856 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3857 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3858 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3859 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3860 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3861 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3862 if (j<temp_constraints) { 3863 PetscInt ii; 3864 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3865 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3866 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3867 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3868 for (k=0;k<temp_constraints-j;k++) { 3869 for (ii=0;ii<size_of_constraint;ii++) { 3870 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3871 } 3872 } 3873 } 3874 #else /* on missing GESVD */ 3875 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3876 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3877 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3878 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3879 #if !defined(PETSC_USE_COMPLEX) 3880 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3881 #else 3882 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3883 #endif 3884 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3885 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3886 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3887 k = temp_constraints; 3888 if (k > size_of_constraint) k = size_of_constraint; 3889 j = 0; 3890 while (j < k && singular_vals[k-j-1] < tol) j++; 3891 valid_constraints = k-j; 3892 total_counts = total_counts-temp_constraints+valid_constraints; 3893 #endif /* on missing GESVD */ 3894 } 3895 } 3896 /* update pointers information */ 3897 if (valid_constraints) { 3898 constraints_n[total_counts_cc] = valid_constraints; 3899 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3900 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3901 /* set change_of_basis flag */ 3902 if (boolforchange) { 3903 PetscBTSet(change_basis,total_counts_cc); 3904 } 3905 total_counts_cc++; 3906 } 3907 } 3908 /* free workspace */ 3909 if (!skip_lapack) { 3910 ierr = PetscFree(work);CHKERRQ(ierr); 3911 #if defined(PETSC_USE_COMPLEX) 3912 ierr = PetscFree(rwork);CHKERRQ(ierr); 3913 #endif 3914 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3915 #if defined(PETSC_MISSING_LAPACK_GESVD) 3916 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3917 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3918 #endif 3919 } 3920 for (k=0;k<nnsp_size;k++) { 3921 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3922 } 3923 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3924 /* free index sets of faces, edges and vertices */ 3925 for (i=0;i<n_ISForFaces;i++) { 3926 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3927 } 3928 if (n_ISForFaces) { 3929 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3930 } 3931 for (i=0;i<n_ISForEdges;i++) { 3932 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3933 } 3934 if (n_ISForEdges) { 3935 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3936 } 3937 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3938 } else { 3939 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3940 3941 total_counts = 0; 3942 n_vertices = 0; 3943 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3944 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3945 } 3946 max_constraints = 0; 3947 total_counts_cc = 0; 3948 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3949 total_counts += pcbddc->adaptive_constraints_n[i]; 3950 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3951 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3952 } 3953 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3954 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3955 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3956 constraints_data = pcbddc->adaptive_constraints_data; 3957 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3958 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3959 total_counts_cc = 0; 3960 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3961 if (pcbddc->adaptive_constraints_n[i]) { 3962 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3963 } 3964 } 3965 #if 0 3966 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3967 for (i=0;i<total_counts_cc;i++) { 3968 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3969 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3970 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3971 printf(" %d",constraints_idxs[j]); 3972 } 3973 printf("\n"); 3974 printf("number of cc: %d\n",constraints_n[i]); 3975 } 3976 for (i=0;i<n_vertices;i++) { 3977 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3978 } 3979 for (i=0;i<sub_schurs->n_subs;i++) { 3980 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3981 } 3982 #endif 3983 3984 max_size_of_constraint = 0; 3985 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3986 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3987 /* Change of basis */ 3988 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3989 if (pcbddc->use_change_of_basis) { 3990 for (i=0;i<sub_schurs->n_subs;i++) { 3991 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3992 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3993 } 3994 } 3995 } 3996 } 3997 pcbddc->local_primal_size = total_counts; 3998 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3999 4000 /* map constraints_idxs in boundary numbering */ 4001 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4002 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4003 4004 /* Create constraint matrix */ 4005 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4006 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4007 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4008 4009 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4010 /* determine if a QR strategy is needed for change of basis */ 4011 qr_needed = PETSC_FALSE; 4012 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4013 total_primal_vertices=0; 4014 pcbddc->local_primal_size_cc = 0; 4015 for (i=0;i<total_counts_cc;i++) { 4016 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4017 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4018 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4019 pcbddc->local_primal_size_cc += 1; 4020 } else if (PetscBTLookup(change_basis,i)) { 4021 for (k=0;k<constraints_n[i];k++) { 4022 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4023 } 4024 pcbddc->local_primal_size_cc += constraints_n[i]; 4025 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4026 PetscBTSet(qr_needed_idx,i); 4027 qr_needed = PETSC_TRUE; 4028 } 4029 } else { 4030 pcbddc->local_primal_size_cc += 1; 4031 } 4032 } 4033 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4034 pcbddc->n_vertices = total_primal_vertices; 4035 /* permute indices in order to have a sorted set of vertices */ 4036 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4037 4038 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4039 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4040 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4041 4042 /* nonzero structure of constraint matrix */ 4043 /* and get reference dof for local constraints */ 4044 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4045 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4046 4047 j = total_primal_vertices; 4048 total_counts = total_primal_vertices; 4049 cum = total_primal_vertices; 4050 for (i=n_vertices;i<total_counts_cc;i++) { 4051 if (!PetscBTLookup(change_basis,i)) { 4052 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4053 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4054 cum++; 4055 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4056 for (k=0;k<constraints_n[i];k++) { 4057 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4058 nnz[j+k] = size_of_constraint; 4059 } 4060 j += constraints_n[i]; 4061 } 4062 } 4063 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4064 ierr = PetscFree(nnz);CHKERRQ(ierr); 4065 4066 /* set values in constraint matrix */ 4067 for (i=0;i<total_primal_vertices;i++) { 4068 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4069 } 4070 total_counts = total_primal_vertices; 4071 for (i=n_vertices;i<total_counts_cc;i++) { 4072 if (!PetscBTLookup(change_basis,i)) { 4073 PetscInt *cols; 4074 4075 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4076 cols = constraints_idxs+constraints_idxs_ptr[i]; 4077 for (k=0;k<constraints_n[i];k++) { 4078 PetscInt row = total_counts+k; 4079 PetscScalar *vals; 4080 4081 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4082 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4083 } 4084 total_counts += constraints_n[i]; 4085 } 4086 } 4087 /* assembling */ 4088 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4089 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4090 4091 /* 4092 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4093 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4094 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4095 */ 4096 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4097 if (pcbddc->use_change_of_basis) { 4098 /* dual and primal dofs on a single cc */ 4099 PetscInt dual_dofs,primal_dofs; 4100 /* working stuff for GEQRF */ 4101 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4102 PetscBLASInt lqr_work; 4103 /* working stuff for UNGQR */ 4104 PetscScalar *gqr_work,lgqr_work_t; 4105 PetscBLASInt lgqr_work; 4106 /* working stuff for TRTRS */ 4107 PetscScalar *trs_rhs; 4108 PetscBLASInt Blas_NRHS; 4109 /* pointers for values insertion into change of basis matrix */ 4110 PetscInt *start_rows,*start_cols; 4111 PetscScalar *start_vals; 4112 /* working stuff for values insertion */ 4113 PetscBT is_primal; 4114 PetscInt *aux_primal_numbering_B; 4115 /* matrix sizes */ 4116 PetscInt global_size,local_size; 4117 /* temporary change of basis */ 4118 Mat localChangeOfBasisMatrix; 4119 /* extra space for debugging */ 4120 PetscScalar *dbg_work; 4121 4122 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4123 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4124 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4125 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4126 /* nonzeros for local mat */ 4127 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4128 if (!pcbddc->benign_change || pcbddc->fake_change) { 4129 for (i=0;i<pcis->n;i++) nnz[i]=1; 4130 } else { 4131 const PetscInt *ii; 4132 PetscInt n; 4133 PetscBool flg_row; 4134 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4135 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4136 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4137 } 4138 for (i=n_vertices;i<total_counts_cc;i++) { 4139 if (PetscBTLookup(change_basis,i)) { 4140 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4141 if (PetscBTLookup(qr_needed_idx,i)) { 4142 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4143 } else { 4144 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4145 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4146 } 4147 } 4148 } 4149 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4150 ierr = PetscFree(nnz);CHKERRQ(ierr); 4151 /* Set interior change in the matrix */ 4152 if (!pcbddc->benign_change || pcbddc->fake_change) { 4153 for (i=0;i<pcis->n;i++) { 4154 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4155 } 4156 } else { 4157 const PetscInt *ii,*jj; 4158 PetscScalar *aa; 4159 PetscInt n; 4160 PetscBool flg_row; 4161 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4162 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4163 for (i=0;i<n;i++) { 4164 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4165 } 4166 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4167 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4168 } 4169 4170 if (pcbddc->dbg_flag) { 4171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4172 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4173 } 4174 4175 4176 /* Now we loop on the constraints which need a change of basis */ 4177 /* 4178 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4179 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4180 4181 Basic blocks of change of basis matrix T computed by 4182 4183 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4184 4185 | 1 0 ... 0 s_1/S | 4186 | 0 1 ... 0 s_2/S | 4187 | ... | 4188 | 0 ... 1 s_{n-1}/S | 4189 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4190 4191 with S = \sum_{i=1}^n s_i^2 4192 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4193 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4194 4195 - QR decomposition of constraints otherwise 4196 */ 4197 if (qr_needed) { 4198 /* space to store Q */ 4199 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4200 /* first we issue queries for optimal work */ 4201 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4202 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4203 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4204 lqr_work = -1; 4205 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4206 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4207 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4208 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4209 lgqr_work = -1; 4210 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4211 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4212 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4213 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4214 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4215 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4216 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4217 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4218 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4219 /* array to store scaling factors for reflectors */ 4220 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4221 /* array to store rhs and solution of triangular solver */ 4222 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4223 /* allocating workspace for check */ 4224 if (pcbddc->dbg_flag) { 4225 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4226 } 4227 } 4228 /* array to store whether a node is primal or not */ 4229 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4230 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4231 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4232 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4233 for (i=0;i<total_primal_vertices;i++) { 4234 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4235 } 4236 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4237 4238 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4239 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4240 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4241 if (PetscBTLookup(change_basis,total_counts)) { 4242 /* get constraint info */ 4243 primal_dofs = constraints_n[total_counts]; 4244 dual_dofs = size_of_constraint-primal_dofs; 4245 4246 if (pcbddc->dbg_flag) { 4247 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4248 } 4249 4250 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4251 4252 /* copy quadrature constraints for change of basis check */ 4253 if (pcbddc->dbg_flag) { 4254 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4255 } 4256 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4257 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4258 4259 /* compute QR decomposition of constraints */ 4260 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4261 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4262 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4263 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4264 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4265 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4266 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4267 4268 /* explictly compute R^-T */ 4269 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4270 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4271 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4272 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4273 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4274 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4275 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4276 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4277 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4278 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4279 4280 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4281 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4282 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4283 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4286 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4287 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4288 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4289 4290 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4291 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4292 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4293 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4294 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4295 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4296 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4297 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4298 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4299 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4300 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4301 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4302 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4303 4304 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4305 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4306 /* insert cols for primal dofs */ 4307 for (j=0;j<primal_dofs;j++) { 4308 start_vals = &qr_basis[j*size_of_constraint]; 4309 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4310 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4311 } 4312 /* insert cols for dual dofs */ 4313 for (j=0,k=0;j<dual_dofs;k++) { 4314 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4315 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4316 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4317 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4318 j++; 4319 } 4320 } 4321 4322 /* check change of basis */ 4323 if (pcbddc->dbg_flag) { 4324 PetscInt ii,jj; 4325 PetscBool valid_qr=PETSC_TRUE; 4326 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4331 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4332 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4333 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4334 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4335 for (jj=0;jj<size_of_constraint;jj++) { 4336 for (ii=0;ii<primal_dofs;ii++) { 4337 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4338 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4339 } 4340 } 4341 if (!valid_qr) { 4342 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4343 for (jj=0;jj<size_of_constraint;jj++) { 4344 for (ii=0;ii<primal_dofs;ii++) { 4345 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4346 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4347 } 4348 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4349 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4350 } 4351 } 4352 } 4353 } else { 4354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4355 } 4356 } 4357 } else { /* simple transformation block */ 4358 PetscInt row,col; 4359 PetscScalar val,norm; 4360 4361 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4362 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4363 for (j=0;j<size_of_constraint;j++) { 4364 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4365 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4366 if (!PetscBTLookup(is_primal,row_B)) { 4367 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4368 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4369 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4370 } else { 4371 for (k=0;k<size_of_constraint;k++) { 4372 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4373 if (row != col) { 4374 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4375 } else { 4376 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4377 } 4378 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4379 } 4380 } 4381 } 4382 if (pcbddc->dbg_flag) { 4383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4384 } 4385 } 4386 } else { 4387 if (pcbddc->dbg_flag) { 4388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4389 } 4390 } 4391 } 4392 4393 /* free workspace */ 4394 if (qr_needed) { 4395 if (pcbddc->dbg_flag) { 4396 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4397 } 4398 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4399 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4400 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4401 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4402 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4403 } 4404 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4405 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4406 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4407 4408 /* assembling of global change of variable */ 4409 if (!pcbddc->fake_change) { 4410 Mat tmat; 4411 PetscInt bs; 4412 4413 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4414 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4415 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4416 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4417 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4418 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4419 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4420 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4421 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4422 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4423 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4424 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4425 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4426 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4427 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4428 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4429 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4430 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4431 4432 /* check */ 4433 if (pcbddc->dbg_flag) { 4434 PetscReal error; 4435 Vec x,x_change; 4436 4437 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4438 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4439 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4440 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4441 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4442 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4443 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4444 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4445 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4446 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4447 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4448 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4450 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4451 ierr = VecDestroy(&x);CHKERRQ(ierr); 4452 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4453 } 4454 /* adapt sub_schurs computed (if any) */ 4455 if (pcbddc->use_deluxe_scaling) { 4456 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4457 4458 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4459 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4460 } 4461 if (sub_schurs->S_Ej_all) { 4462 Mat S_new,tmat; 4463 IS is_all_N,is_V_Sall = NULL; 4464 4465 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4466 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4467 if (pcbddc->deluxe_zerorows) { 4468 ISLocalToGlobalMapping NtoSall; 4469 IS is_V; 4470 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4471 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4472 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4473 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4474 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4475 } 4476 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4477 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4478 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4479 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4480 if (pcbddc->deluxe_zerorows) { 4481 const PetscScalar *array; 4482 const PetscInt *idxs_V,*idxs_all; 4483 PetscInt i,n_V; 4484 4485 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4486 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4487 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4488 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4489 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4490 for (i=0;i<n_V;i++) { 4491 PetscScalar val; 4492 PetscInt idx; 4493 4494 idx = idxs_V[i]; 4495 val = array[idxs_all[idxs_V[i]]]; 4496 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4497 } 4498 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4499 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4500 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4501 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4502 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4503 } 4504 sub_schurs->S_Ej_all = S_new; 4505 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4506 if (sub_schurs->sum_S_Ej_all) { 4507 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4508 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4509 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4510 if (pcbddc->deluxe_zerorows) { 4511 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4512 } 4513 sub_schurs->sum_S_Ej_all = S_new; 4514 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4515 } 4516 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4517 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4518 } 4519 /* destroy any change of basis context in sub_schurs */ 4520 if (sub_schurs->change) { 4521 PetscInt i; 4522 4523 for (i=0;i<sub_schurs->n_subs;i++) { 4524 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4525 } 4526 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4527 } 4528 } 4529 if (pcbddc->switch_static) { /* need to save the local change */ 4530 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4531 } else { 4532 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4533 } 4534 /* determine if any process has changed the pressures locally */ 4535 pcbddc->change_interior = pcbddc->benign_have_null; 4536 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4537 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4538 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4539 pcbddc->use_qr_single = qr_needed; 4540 } 4541 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4542 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4543 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4544 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4545 } else { 4546 Mat benign_global = NULL; 4547 if (pcbddc->benign_have_null) { 4548 Mat tmat; 4549 4550 pcbddc->change_interior = PETSC_TRUE; 4551 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4552 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4553 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4554 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4555 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4556 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4557 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4558 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4559 if (pcbddc->benign_change) { 4560 Mat M; 4561 4562 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4563 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4564 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4565 ierr = MatDestroy(&M);CHKERRQ(ierr); 4566 } else { 4567 Mat eye; 4568 PetscScalar *array; 4569 4570 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4571 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4572 for (i=0;i<pcis->n;i++) { 4573 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4574 } 4575 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4576 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4577 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4578 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4579 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4580 } 4581 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4582 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4583 } 4584 if (pcbddc->user_ChangeOfBasisMatrix) { 4585 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4586 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4587 } else if (pcbddc->benign_have_null) { 4588 pcbddc->ChangeOfBasisMatrix = benign_global; 4589 } 4590 } 4591 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4592 IS is_global; 4593 const PetscInt *gidxs; 4594 4595 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4596 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4597 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4598 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4599 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4600 } 4601 } 4602 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4603 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4604 } 4605 4606 if (!pcbddc->fake_change) { 4607 /* add pressure dofs to set of primal nodes for numbering purposes */ 4608 for (i=0;i<pcbddc->benign_n;i++) { 4609 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4610 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4611 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4612 pcbddc->local_primal_size_cc++; 4613 pcbddc->local_primal_size++; 4614 } 4615 4616 /* check if a new primal space has been introduced (also take into account benign trick) */ 4617 pcbddc->new_primal_space_local = PETSC_TRUE; 4618 if (olocal_primal_size == pcbddc->local_primal_size) { 4619 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4620 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4621 if (!pcbddc->new_primal_space_local) { 4622 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4623 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4624 } 4625 } 4626 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4627 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4628 } 4629 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4630 4631 /* flush dbg viewer */ 4632 if (pcbddc->dbg_flag) { 4633 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4634 } 4635 4636 /* free workspace */ 4637 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4638 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4639 if (!pcbddc->adaptive_selection) { 4640 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4641 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4642 } else { 4643 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4644 pcbddc->adaptive_constraints_idxs_ptr, 4645 pcbddc->adaptive_constraints_data_ptr, 4646 pcbddc->adaptive_constraints_idxs, 4647 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4648 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4649 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4650 } 4651 PetscFunctionReturn(0); 4652 } 4653 4654 #undef __FUNCT__ 4655 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4656 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4657 { 4658 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4659 PC_IS *pcis = (PC_IS*)pc->data; 4660 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4661 PetscInt ierr,i,N; 4662 4663 PetscFunctionBegin; 4664 /* Reset previously computed graph */ 4665 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4666 /* Init local Graph struct */ 4667 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4668 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4669 4670 /* Check validity of the csr graph passed in by the user */ 4671 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4672 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4673 } 4674 4675 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4676 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4677 PetscInt *xadj,*adjncy; 4678 PetscInt nvtxs; 4679 PetscBool flg_row=PETSC_FALSE; 4680 4681 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4682 if (flg_row) { 4683 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4684 pcbddc->computed_rowadj = PETSC_TRUE; 4685 } 4686 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4687 } 4688 if (pcbddc->dbg_flag) { 4689 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4690 } 4691 4692 /* Setup of Graph */ 4693 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4694 4695 /* attach info on disconnected subdomains if present */ 4696 if (pcbddc->n_local_subs) { 4697 PetscInt *local_subs; 4698 4699 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4700 for (i=0;i<pcbddc->n_local_subs;i++) { 4701 const PetscInt *idxs; 4702 PetscInt nl,j; 4703 4704 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4705 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4706 for (j=0;j<nl;j++) { 4707 local_subs[idxs[j]] = i; 4708 } 4709 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4710 } 4711 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4712 pcbddc->mat_graph->local_subs = local_subs; 4713 } 4714 4715 /* Graph's connected components analysis */ 4716 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4717 PetscFunctionReturn(0); 4718 } 4719 4720 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4721 #undef __FUNCT__ 4722 #define __FUNCT__ "PCBDDCSubsetNumbering" 4723 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4724 { 4725 PetscSF sf; 4726 PetscLayout map; 4727 const PetscInt *idxs; 4728 PetscInt *leaf_data,*root_data,*gidxs; 4729 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4730 PetscInt n_n,nlocals,start,first_index; 4731 PetscMPIInt commsize; 4732 PetscBool first_found; 4733 PetscErrorCode ierr; 4734 4735 PetscFunctionBegin; 4736 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4737 if (subset_mult) { 4738 PetscCheckSameComm(subset,1,subset_mult,2); 4739 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4740 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4741 } 4742 /* create workspace layout for computing global indices of subset */ 4743 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4744 lbounds[0] = lbounds[1] = 0; 4745 for (i=0;i<n;i++) { 4746 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4747 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4748 } 4749 lbounds[0] = -lbounds[0]; 4750 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4751 gbounds[0] = -gbounds[0]; 4752 N = gbounds[1] - gbounds[0] + 1; 4753 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4754 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4755 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4756 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4757 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4758 4759 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4760 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4761 if (subset_mult) { 4762 const PetscInt* idxs_mult; 4763 4764 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4765 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4766 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4767 } else { 4768 for (i=0;i<n;i++) leaf_data[i] = 1; 4769 } 4770 /* local size of new subset */ 4771 n_n = 0; 4772 for (i=0;i<n;i++) n_n += leaf_data[i]; 4773 4774 /* global indexes in layout */ 4775 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4776 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4777 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4778 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4779 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4780 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4781 4782 /* reduce from leaves to roots */ 4783 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4784 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4785 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4786 4787 /* count indexes in local part of layout */ 4788 nlocals = 0; 4789 first_index = -1; 4790 first_found = PETSC_FALSE; 4791 for (i=0;i<Nl;i++) { 4792 if (!first_found && root_data[i]) { 4793 first_found = PETSC_TRUE; 4794 first_index = i; 4795 } 4796 nlocals += root_data[i]; 4797 } 4798 4799 /* cumulative of number of indexes and size of subset without holes */ 4800 #if defined(PETSC_HAVE_MPI_EXSCAN) 4801 start = 0; 4802 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4803 #else 4804 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4805 start = start-nlocals; 4806 #endif 4807 4808 if (N_n) { /* compute total size of new subset if requested */ 4809 *N_n = start + nlocals; 4810 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4811 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4812 } 4813 4814 /* adapt root data with cumulative */ 4815 if (first_found) { 4816 PetscInt old_index; 4817 4818 root_data[first_index] += start; 4819 old_index = first_index; 4820 for (i=first_index+1;i<Nl;i++) { 4821 if (root_data[i]) { 4822 root_data[i] += root_data[old_index]; 4823 old_index = i; 4824 } 4825 } 4826 } 4827 4828 /* from roots to leaves */ 4829 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4830 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4831 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4832 4833 /* create new IS with global indexes without holes */ 4834 if (subset_mult) { 4835 const PetscInt* idxs_mult; 4836 PetscInt cum; 4837 4838 cum = 0; 4839 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4840 for (i=0;i<n;i++) { 4841 PetscInt j; 4842 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4843 } 4844 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4845 } else { 4846 for (i=0;i<n;i++) { 4847 gidxs[i] = leaf_data[i]-1; 4848 } 4849 } 4850 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4851 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4852 PetscFunctionReturn(0); 4853 } 4854 4855 #undef __FUNCT__ 4856 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4857 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4858 { 4859 PetscInt i,j; 4860 PetscScalar *alphas; 4861 PetscErrorCode ierr; 4862 4863 PetscFunctionBegin; 4864 /* this implements stabilized Gram-Schmidt */ 4865 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4866 for (i=0;i<n;i++) { 4867 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4868 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4869 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4870 } 4871 ierr = PetscFree(alphas);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "MatISGetSubassemblingPattern" 4877 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4878 { 4879 Mat A; 4880 PetscInt n_neighs,*neighs,*n_shared,**shared; 4881 PetscMPIInt size,rank,color; 4882 PetscInt *xadj,*adjncy; 4883 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4884 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4885 PetscInt void_procs,*procs_candidates = NULL; 4886 PetscInt xadj_count, *count; 4887 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4888 PetscSubcomm psubcomm; 4889 MPI_Comm subcomm; 4890 PetscErrorCode ierr; 4891 4892 PetscFunctionBegin; 4893 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4894 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4895 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4896 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4897 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4898 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4899 4900 if (have_void) *have_void = PETSC_FALSE; 4901 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4902 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4903 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4904 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4905 im_active = !!(n); 4906 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4907 void_procs = size - active_procs; 4908 /* get ranks of of non-active processes in mat communicator */ 4909 if (void_procs) { 4910 PetscInt ncand; 4911 4912 if (have_void) *have_void = PETSC_TRUE; 4913 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4914 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4915 for (i=0,ncand=0;i<size;i++) { 4916 if (!procs_candidates[i]) { 4917 procs_candidates[ncand++] = i; 4918 } 4919 } 4920 /* force n_subdomains to be not greater that the number of non-active processes */ 4921 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4922 } 4923 4924 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4925 if (active_procs < *n_subdomains) { 4926 PetscInt issize,isidx; 4927 if (im_active) { 4928 issize = 1; 4929 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4930 isidx = procs_candidates[rank]; 4931 } else { 4932 isidx = rank; 4933 } 4934 } else { 4935 issize = 0; 4936 isidx = -1; 4937 } 4938 *n_subdomains = active_procs; 4939 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4940 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4941 PetscFunctionReturn(0); 4942 } 4943 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4944 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4945 threshold = PetscMax(threshold,2); 4946 4947 /* Get info on mapping */ 4948 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4949 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4950 4951 /* build local CSR graph of subdomains' connectivity */ 4952 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4953 xadj[0] = 0; 4954 xadj[1] = PetscMax(n_neighs-1,0); 4955 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4956 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4957 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4958 for (i=1;i<n_neighs;i++) 4959 for (j=0;j<n_shared[i];j++) 4960 count[shared[i][j]] += 1; 4961 4962 xadj_count = 0; 4963 for (i=1;i<n_neighs;i++) { 4964 for (j=0;j<n_shared[i];j++) { 4965 if (count[shared[i][j]] < threshold) { 4966 adjncy[xadj_count] = neighs[i]; 4967 adjncy_wgt[xadj_count] = n_shared[i]; 4968 xadj_count++; 4969 break; 4970 } 4971 } 4972 } 4973 xadj[1] = xadj_count; 4974 ierr = PetscFree(count);CHKERRQ(ierr); 4975 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4976 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4977 4978 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4979 4980 /* Restrict work on active processes only */ 4981 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4982 if (void_procs) { 4983 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4984 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4985 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4986 subcomm = PetscSubcommChild(psubcomm); 4987 } else { 4988 psubcomm = NULL; 4989 subcomm = PetscObjectComm((PetscObject)mat); 4990 } 4991 4992 v_wgt = NULL; 4993 if (!color) { 4994 ierr = PetscFree(xadj);CHKERRQ(ierr); 4995 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4996 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4997 } else { 4998 Mat subdomain_adj; 4999 IS new_ranks,new_ranks_contig; 5000 MatPartitioning partitioner; 5001 PetscInt rstart=0,rend=0; 5002 PetscInt *is_indices,*oldranks; 5003 PetscMPIInt size; 5004 PetscBool aggregate; 5005 5006 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5007 if (void_procs) { 5008 PetscInt prank = rank; 5009 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5010 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5011 for (i=0;i<xadj[1];i++) { 5012 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5013 } 5014 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5015 } else { 5016 oldranks = NULL; 5017 } 5018 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5019 if (aggregate) { /* TODO: all this part could be made more efficient */ 5020 PetscInt lrows,row,ncols,*cols; 5021 PetscMPIInt nrank; 5022 PetscScalar *vals; 5023 5024 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5025 lrows = 0; 5026 if (nrank<redprocs) { 5027 lrows = size/redprocs; 5028 if (nrank<size%redprocs) lrows++; 5029 } 5030 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5031 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5032 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5033 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5034 row = nrank; 5035 ncols = xadj[1]-xadj[0]; 5036 cols = adjncy; 5037 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5038 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5039 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5040 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5041 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5042 ierr = PetscFree(xadj);CHKERRQ(ierr); 5043 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5044 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5045 ierr = PetscFree(vals);CHKERRQ(ierr); 5046 if (use_vwgt) { 5047 Vec v; 5048 const PetscScalar *array; 5049 PetscInt nl; 5050 5051 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5052 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5053 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5054 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5055 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5056 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5057 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5058 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5059 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5060 ierr = VecDestroy(&v);CHKERRQ(ierr); 5061 } 5062 } else { 5063 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5064 if (use_vwgt) { 5065 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5066 v_wgt[0] = local_size; 5067 } 5068 } 5069 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5070 5071 /* Partition */ 5072 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5073 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5074 if (v_wgt) { 5075 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5076 } 5077 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5078 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5079 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5080 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5081 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5082 5083 /* renumber new_ranks to avoid "holes" in new set of processors */ 5084 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5085 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5086 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5087 if (!aggregate) { 5088 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5089 #if defined(PETSC_USE_DEBUG) 5090 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5091 #endif 5092 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5093 } else if (oldranks) { 5094 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5095 } else { 5096 ranks_send_to_idx[0] = is_indices[0]; 5097 } 5098 } else { 5099 PetscInt idxs[1]; 5100 PetscMPIInt tag; 5101 MPI_Request *reqs; 5102 5103 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5104 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5105 for (i=rstart;i<rend;i++) { 5106 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5107 } 5108 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5109 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5110 ierr = PetscFree(reqs);CHKERRQ(ierr); 5111 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5112 #if defined(PETSC_USE_DEBUG) 5113 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5114 #endif 5115 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5116 } else if (oldranks) { 5117 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5118 } else { 5119 ranks_send_to_idx[0] = idxs[0]; 5120 } 5121 } 5122 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5123 /* clean up */ 5124 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5125 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5126 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5127 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5128 } 5129 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5130 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5131 5132 /* assemble parallel IS for sends */ 5133 i = 1; 5134 if (!color) i=0; 5135 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5136 PetscFunctionReturn(0); 5137 } 5138 5139 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5140 5141 #undef __FUNCT__ 5142 #define __FUNCT__ "MatISSubassemble" 5143 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5144 { 5145 Mat local_mat; 5146 IS is_sends_internal; 5147 PetscInt rows,cols,new_local_rows; 5148 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5149 PetscBool ismatis,isdense,newisdense,destroy_mat; 5150 ISLocalToGlobalMapping l2gmap; 5151 PetscInt* l2gmap_indices; 5152 const PetscInt* is_indices; 5153 MatType new_local_type; 5154 /* buffers */ 5155 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5156 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5157 PetscInt *recv_buffer_idxs_local; 5158 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5159 /* MPI */ 5160 MPI_Comm comm,comm_n; 5161 PetscSubcomm subcomm; 5162 PetscMPIInt n_sends,n_recvs,commsize; 5163 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5164 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5165 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5166 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5167 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5168 PetscErrorCode ierr; 5169 5170 PetscFunctionBegin; 5171 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5172 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5173 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5174 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5175 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5176 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5177 PetscValidLogicalCollectiveBool(mat,reuse,6); 5178 PetscValidLogicalCollectiveInt(mat,nis,8); 5179 5180 /* further checks */ 5181 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5182 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5183 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5184 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5185 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5186 if (reuse && *mat_n) { 5187 PetscInt mrows,mcols,mnrows,mncols; 5188 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5189 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5190 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5191 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5192 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5193 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5194 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5195 } 5196 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5197 PetscValidLogicalCollectiveInt(mat,bs,0); 5198 5199 /* prepare IS for sending if not provided */ 5200 if (!is_sends) { 5201 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5202 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5203 } else { 5204 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5205 is_sends_internal = is_sends; 5206 } 5207 5208 /* get comm */ 5209 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5210 5211 /* compute number of sends */ 5212 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5213 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5214 5215 /* compute number of receives */ 5216 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5217 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5218 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5219 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5220 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5221 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5222 ierr = PetscFree(iflags);CHKERRQ(ierr); 5223 5224 /* restrict comm if requested */ 5225 subcomm = 0; 5226 destroy_mat = PETSC_FALSE; 5227 if (restrict_comm) { 5228 PetscMPIInt color,subcommsize; 5229 5230 color = 0; 5231 if (restrict_full) { 5232 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5233 } else { 5234 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5235 } 5236 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5237 subcommsize = commsize - subcommsize; 5238 /* check if reuse has been requested */ 5239 if (reuse) { 5240 if (*mat_n) { 5241 PetscMPIInt subcommsize2; 5242 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5243 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5244 comm_n = PetscObjectComm((PetscObject)*mat_n); 5245 } else { 5246 comm_n = PETSC_COMM_SELF; 5247 } 5248 } else { /* MAT_INITIAL_MATRIX */ 5249 PetscMPIInt rank; 5250 5251 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5252 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5253 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5254 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5255 comm_n = PetscSubcommChild(subcomm); 5256 } 5257 /* flag to destroy *mat_n if not significative */ 5258 if (color) destroy_mat = PETSC_TRUE; 5259 } else { 5260 comm_n = comm; 5261 } 5262 5263 /* prepare send/receive buffers */ 5264 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5265 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5266 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5267 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5268 if (nis) { 5269 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5270 } 5271 5272 /* Get data from local matrices */ 5273 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5274 /* TODO: See below some guidelines on how to prepare the local buffers */ 5275 /* 5276 send_buffer_vals should contain the raw values of the local matrix 5277 send_buffer_idxs should contain: 5278 - MatType_PRIVATE type 5279 - PetscInt size_of_l2gmap 5280 - PetscInt global_row_indices[size_of_l2gmap] 5281 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5282 */ 5283 else { 5284 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5285 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5286 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5287 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5288 send_buffer_idxs[1] = i; 5289 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5290 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5291 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5292 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5293 for (i=0;i<n_sends;i++) { 5294 ilengths_vals[is_indices[i]] = len*len; 5295 ilengths_idxs[is_indices[i]] = len+2; 5296 } 5297 } 5298 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5299 /* additional is (if any) */ 5300 if (nis) { 5301 PetscMPIInt psum; 5302 PetscInt j; 5303 for (j=0,psum=0;j<nis;j++) { 5304 PetscInt plen; 5305 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5306 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5307 psum += len+1; /* indices + lenght */ 5308 } 5309 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5310 for (j=0,psum=0;j<nis;j++) { 5311 PetscInt plen; 5312 const PetscInt *is_array_idxs; 5313 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5314 send_buffer_idxs_is[psum] = plen; 5315 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5316 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5317 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5318 psum += plen+1; /* indices + lenght */ 5319 } 5320 for (i=0;i<n_sends;i++) { 5321 ilengths_idxs_is[is_indices[i]] = psum; 5322 } 5323 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5324 } 5325 5326 buf_size_idxs = 0; 5327 buf_size_vals = 0; 5328 buf_size_idxs_is = 0; 5329 for (i=0;i<n_recvs;i++) { 5330 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5331 buf_size_vals += (PetscInt)olengths_vals[i]; 5332 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5333 } 5334 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5335 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5336 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5337 5338 /* get new tags for clean communications */ 5339 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5340 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5341 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5342 5343 /* allocate for requests */ 5344 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5345 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5346 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5347 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5348 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5349 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5350 5351 /* communications */ 5352 ptr_idxs = recv_buffer_idxs; 5353 ptr_vals = recv_buffer_vals; 5354 ptr_idxs_is = recv_buffer_idxs_is; 5355 for (i=0;i<n_recvs;i++) { 5356 source_dest = onodes[i]; 5357 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5358 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5359 ptr_idxs += olengths_idxs[i]; 5360 ptr_vals += olengths_vals[i]; 5361 if (nis) { 5362 source_dest = onodes_is[i]; 5363 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5364 ptr_idxs_is += olengths_idxs_is[i]; 5365 } 5366 } 5367 for (i=0;i<n_sends;i++) { 5368 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5369 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5370 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5371 if (nis) { 5372 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5373 } 5374 } 5375 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5376 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5377 5378 /* assemble new l2g map */ 5379 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5380 ptr_idxs = recv_buffer_idxs; 5381 new_local_rows = 0; 5382 for (i=0;i<n_recvs;i++) { 5383 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5384 ptr_idxs += olengths_idxs[i]; 5385 } 5386 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5387 ptr_idxs = recv_buffer_idxs; 5388 new_local_rows = 0; 5389 for (i=0;i<n_recvs;i++) { 5390 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5391 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5392 ptr_idxs += olengths_idxs[i]; 5393 } 5394 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5395 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5396 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5397 5398 /* infer new local matrix type from received local matrices type */ 5399 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5400 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5401 if (n_recvs) { 5402 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5403 ptr_idxs = recv_buffer_idxs; 5404 for (i=0;i<n_recvs;i++) { 5405 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5406 new_local_type_private = MATAIJ_PRIVATE; 5407 break; 5408 } 5409 ptr_idxs += olengths_idxs[i]; 5410 } 5411 switch (new_local_type_private) { 5412 case MATDENSE_PRIVATE: 5413 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5414 new_local_type = MATSEQAIJ; 5415 bs = 1; 5416 } else { /* if I receive only 1 dense matrix */ 5417 new_local_type = MATSEQDENSE; 5418 bs = 1; 5419 } 5420 break; 5421 case MATAIJ_PRIVATE: 5422 new_local_type = MATSEQAIJ; 5423 bs = 1; 5424 break; 5425 case MATBAIJ_PRIVATE: 5426 new_local_type = MATSEQBAIJ; 5427 break; 5428 case MATSBAIJ_PRIVATE: 5429 new_local_type = MATSEQSBAIJ; 5430 break; 5431 default: 5432 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5433 break; 5434 } 5435 } else { /* by default, new_local_type is seqdense */ 5436 new_local_type = MATSEQDENSE; 5437 bs = 1; 5438 } 5439 5440 /* create MATIS object if needed */ 5441 if (!reuse) { 5442 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5443 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5444 } else { 5445 /* it also destroys the local matrices */ 5446 if (*mat_n) { 5447 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5448 } else { /* this is a fake object */ 5449 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5450 } 5451 } 5452 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5453 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5454 5455 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5456 5457 /* Global to local map of received indices */ 5458 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5459 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5460 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5461 5462 /* restore attributes -> type of incoming data and its size */ 5463 buf_size_idxs = 0; 5464 for (i=0;i<n_recvs;i++) { 5465 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5466 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5467 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5468 } 5469 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5470 5471 /* set preallocation */ 5472 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5473 if (!newisdense) { 5474 PetscInt *new_local_nnz=0; 5475 5476 ptr_idxs = recv_buffer_idxs_local; 5477 if (n_recvs) { 5478 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5479 } 5480 for (i=0;i<n_recvs;i++) { 5481 PetscInt j; 5482 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5483 for (j=0;j<*(ptr_idxs+1);j++) { 5484 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5485 } 5486 } else { 5487 /* TODO */ 5488 } 5489 ptr_idxs += olengths_idxs[i]; 5490 } 5491 if (new_local_nnz) { 5492 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5493 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5494 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5495 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5496 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5497 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5498 } else { 5499 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5500 } 5501 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5502 } else { 5503 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5504 } 5505 5506 /* set values */ 5507 ptr_vals = recv_buffer_vals; 5508 ptr_idxs = recv_buffer_idxs_local; 5509 for (i=0;i<n_recvs;i++) { 5510 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5511 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5512 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5513 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5514 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5515 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5516 } else { 5517 /* TODO */ 5518 } 5519 ptr_idxs += olengths_idxs[i]; 5520 ptr_vals += olengths_vals[i]; 5521 } 5522 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5523 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5524 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5525 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5526 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5527 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5528 5529 #if 0 5530 if (!restrict_comm) { /* check */ 5531 Vec lvec,rvec; 5532 PetscReal infty_error; 5533 5534 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5535 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5536 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5537 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5538 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5539 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5540 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5541 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5542 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5543 } 5544 #endif 5545 5546 /* assemble new additional is (if any) */ 5547 if (nis) { 5548 PetscInt **temp_idxs,*count_is,j,psum; 5549 5550 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5551 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5552 ptr_idxs = recv_buffer_idxs_is; 5553 psum = 0; 5554 for (i=0;i<n_recvs;i++) { 5555 for (j=0;j<nis;j++) { 5556 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5557 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5558 psum += plen; 5559 ptr_idxs += plen+1; /* shift pointer to received data */ 5560 } 5561 } 5562 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5563 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5564 for (i=1;i<nis;i++) { 5565 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5566 } 5567 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5568 ptr_idxs = recv_buffer_idxs_is; 5569 for (i=0;i<n_recvs;i++) { 5570 for (j=0;j<nis;j++) { 5571 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5572 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5573 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5574 ptr_idxs += plen+1; /* shift pointer to received data */ 5575 } 5576 } 5577 for (i=0;i<nis;i++) { 5578 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5579 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5580 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5581 } 5582 ierr = PetscFree(count_is);CHKERRQ(ierr); 5583 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5584 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5585 } 5586 /* free workspace */ 5587 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5588 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5589 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5590 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5591 if (isdense) { 5592 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5593 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5594 } else { 5595 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5596 } 5597 if (nis) { 5598 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5599 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5600 } 5601 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5602 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5603 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5604 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5605 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5606 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5607 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5608 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5609 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5610 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5611 ierr = PetscFree(onodes);CHKERRQ(ierr); 5612 if (nis) { 5613 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5614 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5615 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5616 } 5617 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5618 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5619 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5620 for (i=0;i<nis;i++) { 5621 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5622 } 5623 *mat_n = NULL; 5624 } 5625 PetscFunctionReturn(0); 5626 } 5627 5628 /* temporary hack into ksp private data structure */ 5629 #include <petsc/private/kspimpl.h> 5630 5631 #undef __FUNCT__ 5632 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5633 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5634 { 5635 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5636 PC_IS *pcis = (PC_IS*)pc->data; 5637 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5638 MatNullSpace CoarseNullSpace = NULL; 5639 ISLocalToGlobalMapping coarse_islg; 5640 IS coarse_is,*isarray; 5641 PetscInt i,im_active=-1,active_procs=-1; 5642 PetscInt nis,nisdofs,nisneu,nisvert; 5643 PC pc_temp; 5644 PCType coarse_pc_type; 5645 KSPType coarse_ksp_type; 5646 PetscBool multilevel_requested,multilevel_allowed; 5647 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5648 Mat t_coarse_mat_is; 5649 PetscInt ncoarse; 5650 PetscBool compute_vecs = PETSC_FALSE; 5651 PetscScalar *array; 5652 MatReuse coarse_mat_reuse; 5653 PetscBool restr, full_restr, have_void; 5654 PetscErrorCode ierr; 5655 5656 PetscFunctionBegin; 5657 /* Assign global numbering to coarse dofs */ 5658 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5659 PetscInt ocoarse_size; 5660 compute_vecs = PETSC_TRUE; 5661 ocoarse_size = pcbddc->coarse_size; 5662 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5663 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5664 /* see if we can avoid some work */ 5665 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5666 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5667 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5668 PC pc; 5669 PetscBool isbddc; 5670 5671 /* temporary workaround since PCBDDC does not have a reset method so far */ 5672 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5673 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5674 if (isbddc) { 5675 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5676 } else { 5677 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5678 } 5679 coarse_reuse = PETSC_FALSE; 5680 } else { /* we can safely reuse already computed coarse matrix */ 5681 coarse_reuse = PETSC_TRUE; 5682 } 5683 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5684 coarse_reuse = PETSC_FALSE; 5685 } 5686 /* reset any subassembling information */ 5687 if (!coarse_reuse || pcbddc->recompute_topography) { 5688 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5689 } 5690 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5691 coarse_reuse = PETSC_TRUE; 5692 } 5693 /* assemble coarse matrix */ 5694 if (coarse_reuse && pcbddc->coarse_ksp) { 5695 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5696 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5697 coarse_mat_reuse = MAT_REUSE_MATRIX; 5698 } else { 5699 coarse_mat = NULL; 5700 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5701 } 5702 5703 /* creates temporary l2gmap and IS for coarse indexes */ 5704 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5705 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5706 5707 /* creates temporary MATIS object for coarse matrix */ 5708 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5709 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5710 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5711 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5712 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5713 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5714 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5715 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5716 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5717 5718 /* count "active" (i.e. with positive local size) and "void" processes */ 5719 im_active = !!(pcis->n); 5720 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5721 5722 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5723 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5724 /* full_restr : just use the receivers from the subassembling pattern */ 5725 coarse_mat_is = NULL; 5726 multilevel_allowed = PETSC_FALSE; 5727 multilevel_requested = PETSC_FALSE; 5728 full_restr = PETSC_TRUE; 5729 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5730 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5731 if (multilevel_requested) { 5732 ncoarse = active_procs/pcbddc->coarsening_ratio; 5733 restr = PETSC_FALSE; 5734 full_restr = PETSC_FALSE; 5735 } else { 5736 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5737 restr = PETSC_TRUE; 5738 full_restr = PETSC_TRUE; 5739 } 5740 ncoarse = PetscMax(1,ncoarse); 5741 if (!pcbddc->coarse_subassembling) { 5742 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5743 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5744 PetscInt psum; 5745 PetscMPIInt size; 5746 if (pcbddc->coarse_ksp) psum = 1; 5747 else psum = 0; 5748 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5749 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5750 if (ncoarse < size) have_void = PETSC_TRUE; 5751 } 5752 /* determine if we can go multilevel */ 5753 if (multilevel_requested) { 5754 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5755 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5756 } 5757 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5758 5759 /* dump subassembling pattern */ 5760 if (pcbddc->dbg_flag && multilevel_allowed) { 5761 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5762 } 5763 5764 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5765 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5766 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5767 const PetscInt *idxs; 5768 ISLocalToGlobalMapping tmap; 5769 5770 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5771 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5772 /* allocate space for temporary storage */ 5773 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5774 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5775 /* allocate for IS array */ 5776 nisdofs = pcbddc->n_ISForDofsLocal; 5777 nisneu = !!pcbddc->NeumannBoundariesLocal; 5778 nisvert = 0; /* nisvert is not used */ 5779 nis = nisdofs + nisneu + nisvert; 5780 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5781 /* dofs splitting */ 5782 for (i=0;i<nisdofs;i++) { 5783 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5784 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5785 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5786 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5787 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5788 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5789 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5790 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5791 } 5792 /* neumann boundaries */ 5793 if (pcbddc->NeumannBoundariesLocal) { 5794 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5795 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5796 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5797 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5798 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5799 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5800 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5801 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5802 } 5803 /* free memory */ 5804 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5805 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5806 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5807 } else { 5808 nis = 0; 5809 nisdofs = 0; 5810 nisneu = 0; 5811 nisvert = 0; 5812 isarray = NULL; 5813 } 5814 /* destroy no longer needed map */ 5815 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5816 5817 /* subassemble */ 5818 if (multilevel_allowed) { 5819 PetscBool reuse,reuser; 5820 if (coarse_mat) reuse = PETSC_TRUE; 5821 else reuse = PETSC_FALSE; 5822 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5823 if (reuser) { 5824 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5825 } else { 5826 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5827 } 5828 /* TODO: if (pcbddc->benign_have_null) -> give a hint to the coarser levels if they have to locally apply the benign trick or not */ 5829 } else { 5830 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5831 } 5832 if (coarse_mat_is || coarse_mat) { 5833 PetscMPIInt size; 5834 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5835 if (!multilevel_allowed) { 5836 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5837 } else { 5838 Mat A; 5839 5840 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5841 if (coarse_mat_is) { 5842 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5843 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5844 coarse_mat = coarse_mat_is; 5845 } 5846 /* be sure we don't have MatSeqDENSE as local mat */ 5847 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5848 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5849 } 5850 } 5851 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5852 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5853 5854 /* create local to global scatters for coarse problem */ 5855 if (compute_vecs) { 5856 PetscInt lrows; 5857 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5858 if (coarse_mat) { 5859 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5860 } else { 5861 lrows = 0; 5862 } 5863 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5864 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5865 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5866 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5867 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5868 } 5869 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5870 5871 /* set defaults for coarse KSP and PC */ 5872 if (multilevel_allowed) { 5873 coarse_ksp_type = KSPRICHARDSON; 5874 coarse_pc_type = PCBDDC; 5875 } else { 5876 coarse_ksp_type = KSPPREONLY; 5877 coarse_pc_type = PCREDUNDANT; 5878 } 5879 5880 /* print some info if requested */ 5881 if (pcbddc->dbg_flag) { 5882 if (!multilevel_allowed) { 5883 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5884 if (multilevel_requested) { 5885 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5886 } else if (pcbddc->max_levels) { 5887 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5888 } 5889 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5890 } 5891 } 5892 5893 /* create the coarse KSP object only once with defaults */ 5894 if (coarse_mat) { 5895 PetscViewer dbg_viewer = NULL; 5896 if (pcbddc->dbg_flag) { 5897 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5898 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5899 } 5900 if (!pcbddc->coarse_ksp) { 5901 char prefix[256],str_level[16]; 5902 size_t len; 5903 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5904 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5905 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5906 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5907 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5908 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5909 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5910 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5911 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5912 /* prefix */ 5913 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5914 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5915 if (!pcbddc->current_level) { 5916 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5917 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5918 } else { 5919 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5920 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5921 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5922 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5923 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5924 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5925 } 5926 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5927 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5928 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5929 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5930 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5931 /* allow user customization */ 5932 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5933 } 5934 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5935 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5936 if (nisdofs) { 5937 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5938 for (i=0;i<nisdofs;i++) { 5939 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5940 } 5941 } 5942 if (nisneu) { 5943 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5944 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5945 } 5946 if (nisvert) { 5947 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5948 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5949 } 5950 5951 /* get some info after set from options */ 5952 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5953 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5954 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5955 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5956 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5957 isbddc = PETSC_FALSE; 5958 } 5959 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5960 if (isredundant) { 5961 KSP inner_ksp; 5962 PC inner_pc; 5963 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5964 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5965 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5966 } 5967 5968 /* parameters which miss an API */ 5969 if (isbddc) { 5970 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5971 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5972 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5973 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5974 if (pcbddc_coarse->benign_saddle_point) { 5975 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5976 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5977 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5978 } 5979 } 5980 5981 /* propagate symmetry info of coarse matrix */ 5982 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5983 if (pc->pmat->symmetric_set) { 5984 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5985 } 5986 if (pc->pmat->hermitian_set) { 5987 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5988 } 5989 if (pc->pmat->spd_set) { 5990 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5991 } 5992 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 5993 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 5994 } 5995 /* set operators */ 5996 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5997 if (pcbddc->dbg_flag) { 5998 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5999 } 6000 } 6001 ierr = PetscFree(isarray);CHKERRQ(ierr); 6002 #if 0 6003 { 6004 PetscViewer viewer; 6005 char filename[256]; 6006 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6007 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6008 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6009 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6010 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6011 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6012 } 6013 #endif 6014 6015 if (pcbddc->coarse_ksp) { 6016 Vec crhs,csol; 6017 6018 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6019 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6020 if (!csol) { 6021 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6022 } 6023 if (!crhs) { 6024 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6025 } 6026 } 6027 6028 /* compute null space for coarse solver if the benign trick has been requested */ 6029 if (pcbddc->benign_null) { 6030 6031 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6032 for (i=0;i<pcbddc->benign_n;i++) { 6033 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6034 } 6035 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6036 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6037 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6038 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6039 if (coarse_mat) { 6040 Vec nullv; 6041 PetscScalar *array,*array2; 6042 PetscInt nl; 6043 6044 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6045 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6046 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6047 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6048 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6049 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6050 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6051 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6052 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6053 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6054 } 6055 } 6056 6057 if (pcbddc->coarse_ksp) { 6058 PetscBool ispreonly; 6059 6060 if (CoarseNullSpace) { 6061 PetscBool isnull; 6062 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6063 if (isnull) { 6064 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6065 } 6066 /* TODO: add local nullspaces (if any) */ 6067 } 6068 /* setup coarse ksp */ 6069 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6070 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6071 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6072 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6073 KSP check_ksp; 6074 KSPType check_ksp_type; 6075 PC check_pc; 6076 Vec check_vec,coarse_vec; 6077 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6078 PetscInt its; 6079 PetscBool compute_eigs; 6080 PetscReal *eigs_r,*eigs_c; 6081 PetscInt neigs; 6082 const char *prefix; 6083 6084 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6085 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6086 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6087 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6088 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6089 /* prevent from setup unneeded object */ 6090 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6091 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6092 if (ispreonly) { 6093 check_ksp_type = KSPPREONLY; 6094 compute_eigs = PETSC_FALSE; 6095 } else { 6096 check_ksp_type = KSPGMRES; 6097 compute_eigs = PETSC_TRUE; 6098 } 6099 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6100 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6101 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6102 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6103 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6104 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6105 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6106 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6107 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6108 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6109 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6110 /* create random vec */ 6111 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6112 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6113 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6114 /* solve coarse problem */ 6115 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6116 /* set eigenvalue estimation if preonly has not been requested */ 6117 if (compute_eigs) { 6118 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6119 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6120 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6121 lambda_max = eigs_r[neigs-1]; 6122 lambda_min = eigs_r[0]; 6123 if (pcbddc->use_coarse_estimates) { 6124 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6125 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6126 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6127 } 6128 } 6129 } 6130 6131 /* check coarse problem residual error */ 6132 if (pcbddc->dbg_flag) { 6133 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6134 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6135 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6136 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6137 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6138 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6139 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6140 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6141 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6142 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6143 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6144 if (CoarseNullSpace) { 6145 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6146 } 6147 if (compute_eigs) { 6148 PetscReal lambda_max_s,lambda_min_s; 6149 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6150 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6151 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6152 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6153 for (i=0;i<neigs;i++) { 6154 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6155 } 6156 } 6157 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6158 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6159 } 6160 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6161 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6162 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6163 if (compute_eigs) { 6164 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6165 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6166 } 6167 } 6168 } 6169 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6170 /* print additional info */ 6171 if (pcbddc->dbg_flag) { 6172 /* waits until all processes reaches this point */ 6173 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6175 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6176 } 6177 6178 /* free memory */ 6179 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6180 PetscFunctionReturn(0); 6181 } 6182 6183 #undef __FUNCT__ 6184 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6185 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6186 { 6187 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6188 PC_IS* pcis = (PC_IS*)pc->data; 6189 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6190 IS subset,subset_mult,subset_n; 6191 PetscInt local_size,coarse_size=0; 6192 PetscInt *local_primal_indices=NULL; 6193 const PetscInt *t_local_primal_indices; 6194 PetscErrorCode ierr; 6195 6196 PetscFunctionBegin; 6197 /* Compute global number of coarse dofs */ 6198 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6199 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6200 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6201 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6202 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6203 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6204 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6205 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6206 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6207 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6208 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6209 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6210 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6211 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6212 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6213 6214 /* check numbering */ 6215 if (pcbddc->dbg_flag) { 6216 PetscScalar coarsesum,*array,*array2; 6217 PetscInt i; 6218 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6219 6220 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6221 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6222 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6223 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6224 /* counter */ 6225 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6226 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6227 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6228 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6229 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6230 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6231 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6232 for (i=0;i<pcbddc->local_primal_size;i++) { 6233 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6234 } 6235 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6236 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6237 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6238 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6239 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6240 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6241 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6242 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6243 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6244 for (i=0;i<pcis->n;i++) { 6245 if (array[i] != 0.0 && array[i] != array2[i]) { 6246 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6247 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6248 set_error = PETSC_TRUE; 6249 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6251 } 6252 } 6253 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6254 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6255 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6256 for (i=0;i<pcis->n;i++) { 6257 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6258 } 6259 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6260 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6261 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6262 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6263 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6264 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6265 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6266 PetscInt *gidxs; 6267 6268 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6269 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6270 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6273 for (i=0;i<pcbddc->local_primal_size;i++) { 6274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6275 } 6276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6277 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6278 } 6279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6280 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6281 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6282 } 6283 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6284 /* get back data */ 6285 *coarse_size_n = coarse_size; 6286 *local_primal_indices_n = local_primal_indices; 6287 PetscFunctionReturn(0); 6288 } 6289 6290 #undef __FUNCT__ 6291 #define __FUNCT__ "PCBDDCGlobalToLocal" 6292 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6293 { 6294 IS localis_t; 6295 PetscInt i,lsize,*idxs,n; 6296 PetscScalar *vals; 6297 PetscErrorCode ierr; 6298 6299 PetscFunctionBegin; 6300 /* get indices in local ordering exploiting local to global map */ 6301 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6302 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6303 for (i=0;i<lsize;i++) vals[i] = 1.0; 6304 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6305 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6306 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6307 if (idxs) { /* multilevel guard */ 6308 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6309 } 6310 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6311 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6312 ierr = PetscFree(vals);CHKERRQ(ierr); 6313 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6314 /* now compute set in local ordering */ 6315 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6316 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6317 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6318 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6319 for (i=0,lsize=0;i<n;i++) { 6320 if (PetscRealPart(vals[i]) > 0.5) { 6321 lsize++; 6322 } 6323 } 6324 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6325 for (i=0,lsize=0;i<n;i++) { 6326 if (PetscRealPart(vals[i]) > 0.5) { 6327 idxs[lsize++] = i; 6328 } 6329 } 6330 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6331 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6332 *localis = localis_t; 6333 PetscFunctionReturn(0); 6334 } 6335 6336 #undef __FUNCT__ 6337 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6338 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6339 { 6340 PC_IS *pcis=(PC_IS*)pc->data; 6341 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6342 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6343 Mat S_j; 6344 PetscInt *used_xadj,*used_adjncy; 6345 PetscBool free_used_adj; 6346 PetscErrorCode ierr; 6347 6348 PetscFunctionBegin; 6349 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6350 free_used_adj = PETSC_FALSE; 6351 if (pcbddc->sub_schurs_layers == -1) { 6352 used_xadj = NULL; 6353 used_adjncy = NULL; 6354 } else { 6355 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6356 used_xadj = pcbddc->mat_graph->xadj; 6357 used_adjncy = pcbddc->mat_graph->adjncy; 6358 } else if (pcbddc->computed_rowadj) { 6359 used_xadj = pcbddc->mat_graph->xadj; 6360 used_adjncy = pcbddc->mat_graph->adjncy; 6361 } else { 6362 PetscBool flg_row=PETSC_FALSE; 6363 const PetscInt *xadj,*adjncy; 6364 PetscInt nvtxs; 6365 6366 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6367 if (flg_row) { 6368 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6369 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6370 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6371 free_used_adj = PETSC_TRUE; 6372 } else { 6373 pcbddc->sub_schurs_layers = -1; 6374 used_xadj = NULL; 6375 used_adjncy = NULL; 6376 } 6377 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6378 } 6379 } 6380 6381 /* setup sub_schurs data */ 6382 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6383 if (!sub_schurs->schur_explicit) { 6384 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6385 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6386 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6387 } else { 6388 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6389 PetscBool isseqaij,need_change = PETSC_FALSE;; 6390 PetscInt benign_n; 6391 Mat change = NULL; 6392 Vec scaling = NULL; 6393 IS change_primal = NULL; 6394 6395 if (!pcbddc->use_vertices && reuse_solvers) { 6396 PetscInt n_vertices; 6397 6398 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6399 reuse_solvers = (PetscBool)!n_vertices; 6400 } 6401 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6402 if (!isseqaij) { 6403 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6404 if (matis->A == pcbddc->local_mat) { 6405 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6406 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6407 } else { 6408 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6409 } 6410 } 6411 if (!pcbddc->benign_change_explicit) { 6412 benign_n = pcbddc->benign_n; 6413 } else { 6414 benign_n = 0; 6415 } 6416 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6417 We need a global reduction to avoid possible deadlocks. 6418 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6419 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6420 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6421 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6422 need_change = (PetscBool)(!need_change); 6423 } 6424 /* If the user defines additional constraints, we import them here. 6425 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6426 if (need_change) { 6427 PC_IS *pcisf; 6428 PC_BDDC *pcbddcf; 6429 PC pcf; 6430 6431 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6432 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6433 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6434 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6435 /* hacks */ 6436 pcisf = (PC_IS*)pcf->data; 6437 pcisf->is_B_local = pcis->is_B_local; 6438 pcisf->vec1_N = pcis->vec1_N; 6439 pcisf->BtoNmap = pcis->BtoNmap; 6440 pcisf->n = pcis->n; 6441 pcisf->n_B = pcis->n_B; 6442 pcbddcf = (PC_BDDC*)pcf->data; 6443 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6444 pcbddcf->mat_graph = pcbddc->mat_graph; 6445 pcbddcf->use_faces = PETSC_TRUE; 6446 pcbddcf->use_change_of_basis = PETSC_TRUE; 6447 pcbddcf->use_change_on_faces = PETSC_TRUE; 6448 pcbddcf->use_qr_single = PETSC_TRUE; 6449 pcbddcf->fake_change = PETSC_TRUE; 6450 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6451 /* store information on primal vertices and change of basis (in local numbering) */ 6452 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6453 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6454 change = pcbddcf->ConstraintMatrix; 6455 pcbddcf->ConstraintMatrix = NULL; 6456 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6457 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6458 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6459 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6460 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6461 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6462 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6463 pcf->ops->destroy = NULL; 6464 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6465 } 6466 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6467 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6468 ierr = MatDestroy(&change);CHKERRQ(ierr); 6469 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6470 } 6471 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6472 6473 /* free adjacency */ 6474 if (free_used_adj) { 6475 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6476 } 6477 PetscFunctionReturn(0); 6478 } 6479 6480 #undef __FUNCT__ 6481 #define __FUNCT__ "PCBDDCInitSubSchurs" 6482 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6483 { 6484 PC_IS *pcis=(PC_IS*)pc->data; 6485 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6486 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6487 PCBDDCGraph graph; 6488 PetscErrorCode ierr; 6489 6490 PetscFunctionBegin; 6491 /* attach interface graph for determining subsets */ 6492 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6493 IS verticesIS,verticescomm; 6494 PetscInt vsize,*idxs; 6495 6496 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6497 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6498 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6499 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6500 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6501 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6502 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6503 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6504 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6505 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6506 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6507 } else { 6508 graph = pcbddc->mat_graph; 6509 } 6510 /* print some info */ 6511 if (pcbddc->dbg_flag) { 6512 IS vertices; 6513 PetscInt nv,nedges,nfaces; 6514 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6515 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6516 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6517 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6518 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6523 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6524 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6525 } 6526 6527 /* sub_schurs init */ 6528 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6529 6530 /* free graph struct */ 6531 if (pcbddc->sub_schurs_rebuild) { 6532 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6533 } 6534 PetscFunctionReturn(0); 6535 } 6536 6537 #undef __FUNCT__ 6538 #define __FUNCT__ "PCBDDCCheckOperator" 6539 PetscErrorCode PCBDDCCheckOperator(PC pc) 6540 { 6541 PC_IS *pcis=(PC_IS*)pc->data; 6542 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6543 PetscErrorCode ierr; 6544 6545 PetscFunctionBegin; 6546 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6547 IS zerodiag = NULL; 6548 Mat S_j,B0_B=NULL; 6549 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6550 PetscScalar *p0_check,*array,*array2; 6551 PetscReal norm; 6552 PetscInt i; 6553 6554 /* B0 and B0_B */ 6555 if (zerodiag) { 6556 IS dummy; 6557 6558 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6559 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6560 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6561 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6562 } 6563 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6564 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6565 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6566 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6567 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6568 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6569 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6570 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6571 /* S_j */ 6572 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6573 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6574 6575 /* mimic vector in \widetilde{W}_\Gamma */ 6576 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6577 /* continuous in primal space */ 6578 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6579 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6580 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6581 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6582 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6583 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6584 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6585 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6586 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6587 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6588 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6589 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6590 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6591 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6592 6593 /* assemble rhs for coarse problem */ 6594 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6595 /* local with Schur */ 6596 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6597 if (zerodiag) { 6598 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6599 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6600 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6601 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6602 } 6603 /* sum on primal nodes the local contributions */ 6604 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6605 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6606 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6607 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6608 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6609 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6610 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6611 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6612 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6613 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6614 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6615 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6616 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6617 /* scale primal nodes (BDDC sums contibutions) */ 6618 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6619 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6620 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6621 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6622 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6623 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6624 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6625 /* global: \widetilde{B0}_B w_\Gamma */ 6626 if (zerodiag) { 6627 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6628 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6629 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6630 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6631 } 6632 /* BDDC */ 6633 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6634 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6635 6636 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6637 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6638 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6639 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6640 for (i=0;i<pcbddc->benign_n;i++) { 6641 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6642 } 6643 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6644 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6645 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6646 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6647 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6648 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6649 } 6650 PetscFunctionReturn(0); 6651 } 6652