1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 9 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 10 { 11 PetscErrorCode ierr; 12 Vec local,global; 13 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 14 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 15 16 PetscFunctionBegin; 17 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 18 /* need to convert from global to local topology information and remove references to information in global ordering */ 19 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 20 if (pcbddc->user_provided_isfordofs) { 21 if (pcbddc->n_ISForDofs) { 22 PetscInt i; 23 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 24 for (i=0;i<pcbddc->n_ISForDofs;i++) { 25 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 26 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 27 } 28 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 29 pcbddc->n_ISForDofs = 0; 30 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 31 } 32 } else { 33 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 34 PetscInt i, n = matis->A->rmap->n; 35 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 36 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 37 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 38 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 39 } 40 } 41 } 42 43 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 44 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 45 } 46 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 47 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 48 } 49 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 50 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 51 } 52 ierr = VecDestroy(&global);CHKERRQ(ierr); 53 ierr = VecDestroy(&local);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 #undef __FUNCT__ 58 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 59 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 60 { 61 PCBDDCBenignMatMult_ctx ctx; 62 PetscErrorCode ierr; 63 PetscBool apply_right,apply_left,reset_x; 64 65 PetscFunctionBegin; 66 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 67 if (transpose) { 68 apply_right = ctx->apply_left; 69 apply_left = ctx->apply_right; 70 } else { 71 apply_right = ctx->apply_right; 72 apply_left = ctx->apply_left; 73 } 74 reset_x = PETSC_FALSE; 75 if (apply_right) { 76 const PetscScalar *ax; 77 PetscInt nl,i; 78 79 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 80 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 81 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 82 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 83 for (i=0;i<ctx->benign_n;i++) { 84 PetscScalar sum,val; 85 const PetscInt *idxs; 86 PetscInt nz,j; 87 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 88 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 89 sum = 0.; 90 if (ctx->apply_p0) { 91 val = ctx->work[idxs[nz-1]]; 92 for (j=0;j<nz-1;j++) { 93 sum += ctx->work[idxs[j]]; 94 ctx->work[idxs[j]] += val; 95 } 96 } else { 97 for (j=0;j<nz-1;j++) { 98 sum += ctx->work[idxs[j]]; 99 } 100 } 101 ctx->work[idxs[nz-1]] -= sum; 102 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 103 } 104 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 105 reset_x = PETSC_TRUE; 106 } 107 if (transpose) { 108 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 109 } else { 110 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 111 } 112 if (reset_x) { 113 ierr = VecResetArray(x);CHKERRQ(ierr); 114 } 115 if (apply_left) { 116 PetscScalar *ay; 117 PetscInt i; 118 119 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 120 for (i=0;i<ctx->benign_n;i++) { 121 PetscScalar sum,val; 122 const PetscInt *idxs; 123 PetscInt nz,j; 124 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 125 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 126 val = -ay[idxs[nz-1]]; 127 if (ctx->apply_p0) { 128 sum = 0.; 129 for (j=0;j<nz-1;j++) { 130 sum += ay[idxs[j]]; 131 ay[idxs[j]] += val; 132 } 133 ay[idxs[nz-1]] += sum; 134 } else { 135 for (j=0;j<nz-1;j++) { 136 ay[idxs[j]] += val; 137 } 138 ay[idxs[nz-1]] = 0.; 139 } 140 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 141 } 142 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 149 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 150 { 151 PetscErrorCode ierr; 152 153 PetscFunctionBegin; 154 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 160 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 161 { 162 PetscErrorCode ierr; 163 164 PetscFunctionBegin; 165 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "PCBDDCBenignShellMat" 171 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 172 { 173 PC_IS *pcis = (PC_IS*)pc->data; 174 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 175 PCBDDCBenignMatMult_ctx ctx; 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 if (!restore) { 180 Mat A_IB,A_BI; 181 PetscScalar *work; 182 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 183 184 if (pcbddc->benign_original_mat) { 185 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 186 } 187 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 188 PetscFunctionReturn(0); 189 } 190 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 191 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 192 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 193 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 194 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 195 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 196 ierr = PetscNew(&ctx);CHKERRQ(ierr); 197 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 198 ctx->apply_left = PETSC_TRUE; 199 ctx->apply_right = PETSC_FALSE; 200 ctx->apply_p0 = PETSC_FALSE; 201 ctx->benign_n = pcbddc->benign_n; 202 if (reuse) { 203 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 204 ctx->free = PETSC_FALSE; 205 } else { /* TODO: could be optimized for successive solves */ 206 ISLocalToGlobalMapping N_to_D; 207 PetscInt i; 208 209 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 210 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 211 for (i=0;i<pcbddc->benign_n;i++) { 212 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 213 } 214 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 215 ctx->free = PETSC_TRUE; 216 } 217 ctx->A = pcis->A_IB; 218 ctx->work = work; 219 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 220 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 221 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 222 pcis->A_IB = A_IB; 223 224 /* A_BI as A_IB^T */ 225 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 226 pcbddc->benign_original_mat = pcis->A_BI; 227 pcis->A_BI = A_BI; 228 } else { 229 if (!pcbddc->benign_original_mat) { 230 PetscFunctionReturn(0); 231 } 232 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 233 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 234 pcis->A_IB = ctx->A; 235 ctx->A = NULL; 236 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 237 pcis->A_BI = pcbddc->benign_original_mat; 238 pcbddc->benign_original_mat = NULL; 239 if (ctx->free) { 240 PetscInt i; 241 for (i=0;i<ctx->benign_n;i++) { 242 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 243 } 244 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 245 } 246 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 247 ierr = PetscFree(ctx);CHKERRQ(ierr); 248 } 249 PetscFunctionReturn(0); 250 } 251 252 /* used just in bddc debug mode */ 253 #undef __FUNCT__ 254 #define __FUNCT__ "PCBDDCBenignProject" 255 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 256 { 257 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 258 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 259 Mat An; 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 264 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 265 if (is1) { 266 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 267 ierr = MatDestroy(&An);CHKERRQ(ierr); 268 } else { 269 *B = An; 270 } 271 PetscFunctionReturn(0); 272 } 273 274 /* TODO: add reuse flag */ 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSeqAIJCompress" 277 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 278 { 279 Mat Bt; 280 PetscScalar *a,*bdata; 281 const PetscInt *ii,*ij; 282 PetscInt m,n,i,nnz,*bii,*bij; 283 PetscBool flg_row; 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 288 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 289 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 290 nnz = n; 291 for (i=0;i<ii[n];i++) { 292 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 293 } 294 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 295 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 296 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 297 nnz = 0; 298 bii[0] = 0; 299 for (i=0;i<n;i++) { 300 PetscInt j; 301 for (j=ii[i];j<ii[i+1];j++) { 302 PetscScalar entry = a[j]; 303 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 304 bij[nnz] = ij[j]; 305 bdata[nnz] = entry; 306 nnz++; 307 } 308 } 309 bii[i+1] = nnz; 310 } 311 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 312 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 313 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 314 { 315 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 316 b->free_a = PETSC_TRUE; 317 b->free_ij = PETSC_TRUE; 318 } 319 *B = Bt; 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatDetectDisconnectedComponents" 325 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 326 { 327 Mat B; 328 IS is_dummy,*cc_n; 329 ISLocalToGlobalMapping l2gmap_dummy; 330 PCBDDCGraph graph; 331 PetscInt i,n; 332 PetscInt *xadj,*adjncy; 333 PetscInt *xadj_filtered,*adjncy_filtered; 334 PetscBool flg_row,isseqaij; 335 PetscErrorCode ierr; 336 337 PetscFunctionBegin; 338 if (!A->rmap->N || !A->cmap->N) { 339 *ncc = 0; 340 *cc = NULL; 341 PetscFunctionReturn(0); 342 } 343 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 344 if (!isseqaij && filter) { 345 PetscBool isseqdense; 346 347 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 348 if (!isseqdense) { 349 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 350 } else { /* TODO: rectangular case and LDA */ 351 PetscScalar *array; 352 PetscReal chop=1.e-6; 353 354 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 355 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 356 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 357 for (i=0;i<n;i++) { 358 PetscInt j; 359 for (j=i+1;j<n;j++) { 360 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 361 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 362 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 363 } 364 } 365 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 366 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 367 } 368 } else { 369 B = A; 370 } 371 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 372 373 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 374 if (filter) { 375 PetscScalar *data; 376 PetscInt j,cum; 377 378 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 379 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 380 cum = 0; 381 for (i=0;i<n;i++) { 382 PetscInt t; 383 384 for (j=xadj[i];j<xadj[i+1];j++) { 385 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 386 continue; 387 } 388 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 389 } 390 t = xadj_filtered[i]; 391 xadj_filtered[i] = cum; 392 cum += t; 393 } 394 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 395 } else { 396 xadj_filtered = NULL; 397 adjncy_filtered = NULL; 398 } 399 400 /* compute local connected components using PCBDDCGraph */ 401 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 402 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 403 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 404 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 405 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 406 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 407 if (xadj_filtered) { 408 graph->xadj = xadj_filtered; 409 graph->adjncy = adjncy_filtered; 410 } else { 411 graph->xadj = xadj; 412 graph->adjncy = adjncy; 413 } 414 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 415 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 416 /* partial clean up */ 417 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 418 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 419 if (A != B) { 420 ierr = MatDestroy(&B);CHKERRQ(ierr); 421 } 422 423 /* get back data */ 424 if (ncc) *ncc = graph->ncc; 425 if (cc) { 426 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 427 for (i=0;i<graph->ncc;i++) { 428 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 429 } 430 *cc = cc_n; 431 } 432 /* clean up graph */ 433 graph->xadj = 0; 434 graph->adjncy = 0; 435 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "PCBDDCBenignCheck" 441 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 442 { 443 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 444 PC_IS* pcis = (PC_IS*)(pc->data); 445 IS dirIS = NULL; 446 PetscInt i; 447 PetscErrorCode ierr; 448 449 PetscFunctionBegin; 450 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 451 if (zerodiag) { 452 Mat A; 453 Vec vec3_N; 454 PetscScalar *vals; 455 const PetscInt *idxs; 456 PetscInt nz,*count; 457 458 /* p0 */ 459 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 460 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 461 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 462 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 463 for (i=0;i<nz;i++) vals[i] = 1.; 464 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 465 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 466 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 467 /* v_I */ 468 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 469 for (i=0;i<nz;i++) vals[i] = 0.; 470 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 471 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 473 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 474 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 476 if (dirIS) { 477 PetscInt n; 478 479 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 480 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 481 for (i=0;i<n;i++) vals[i] = 0.; 482 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 483 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 484 } 485 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 486 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 487 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 488 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 489 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 490 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 491 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 492 if (PetscAbsScalar(vals[0]) > 1.e-1) { 493 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 494 } 495 ierr = PetscFree(vals);CHKERRQ(ierr); 496 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 497 498 /* there should not be any pressure dofs lying on the interface */ 499 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 500 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 501 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 502 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 503 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 504 for (i=0;i<nz;i++) { 505 if (count[idxs[i]]) { 506 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 507 } 508 } 509 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 510 ierr = PetscFree(count);CHKERRQ(ierr); 511 } 512 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 513 514 /* check PCBDDCBenignGetOrSetP0 */ 515 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 516 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 517 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 518 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 519 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 520 for (i=0;i<pcbddc->benign_n;i++) { 521 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 522 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 523 } 524 } 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 530 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 531 { 532 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 533 IS pressures,zerodiag,*zerodiag_subs; 534 PetscInt nz,n; 535 PetscInt *interior_dofs,n_interior_dofs; 536 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 537 PetscErrorCode ierr; 538 539 PetscFunctionBegin; 540 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 541 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 542 for (n=0;n<pcbddc->benign_n;n++) { 543 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 544 } 545 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 546 pcbddc->benign_n = 0; 547 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 548 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 549 Checks if all the pressure dofs in each subdomain have a zero diagonal 550 If not, a change of basis on pressures is not needed 551 since the local Schur complements are SPD 552 */ 553 has_null_pressures = PETSC_TRUE; 554 have_null = PETSC_TRUE; 555 if (pcbddc->n_ISForDofsLocal) { 556 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 557 558 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 559 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 560 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 561 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 562 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 563 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 564 if (!sorted) { 565 ierr = ISSort(pressures);CHKERRQ(ierr); 566 } 567 } else { 568 pressures = NULL; 569 } 570 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 571 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 572 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 573 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 574 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 575 if (!sorted) { 576 ierr = ISSort(zerodiag);CHKERRQ(ierr); 577 } 578 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 579 if (!nz) { 580 if (n) have_null = PETSC_FALSE; 581 has_null_pressures = PETSC_FALSE; 582 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 583 } 584 recompute_zerodiag = PETSC_FALSE; 585 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 586 zerodiag_subs = NULL; 587 pcbddc->benign_n = 0; 588 n_interior_dofs = 0; 589 interior_dofs = NULL; 590 if (pcbddc->current_level) { /* need to compute interior nodes */ 591 PetscInt n,i,j; 592 PetscInt n_neigh,*neigh,*n_shared,**shared; 593 PetscInt *iwork; 594 595 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 596 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 597 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 598 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 599 for (i=0;i<n_neigh;i++) 600 for (j=0;j<n_shared[i];j++) 601 iwork[shared[i][j]] += 1; 602 for (i=0;i<n;i++) 603 if (!iwork[i]) 604 interior_dofs[n_interior_dofs++] = i; 605 ierr = PetscFree(iwork);CHKERRQ(ierr); 606 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 607 } 608 if (has_null_pressures) { 609 IS *subs; 610 PetscInt nsubs,i,j,nl; 611 const PetscInt *idxs; 612 PetscScalar *array; 613 Vec *work; 614 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 615 616 subs = pcbddc->local_subs; 617 nsubs = pcbddc->n_local_subs; 618 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 619 if (pcbddc->current_level) { 620 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 621 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 622 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 623 /* work[0] = 1_p */ 624 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 625 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 626 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 627 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 628 /* work[0] = 1_v */ 629 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 630 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 631 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 632 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 633 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 634 } 635 if (nsubs > 1) { 636 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 637 for (i=0;i<nsubs;i++) { 638 ISLocalToGlobalMapping l2g; 639 IS t_zerodiag_subs; 640 PetscInt nl; 641 642 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 643 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 644 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 645 if (nl) { 646 PetscBool valid = PETSC_TRUE; 647 648 if (pcbddc->current_level) { 649 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 650 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 651 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 652 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 653 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 654 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 655 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 656 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 657 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 658 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 659 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 660 for (j=0;j<n_interior_dofs;j++) { 661 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 662 valid = PETSC_FALSE; 663 break; 664 } 665 } 666 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 667 } 668 if (valid && pcbddc->NeumannBoundariesLocal) { 669 IS t_bc; 670 PetscInt nzb; 671 672 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 673 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 674 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 675 if (nzb) valid = PETSC_FALSE; 676 } 677 if (valid && pressures) { 678 IS t_pressure_subs; 679 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 680 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 681 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 682 } 683 if (valid) { 684 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 685 pcbddc->benign_n++; 686 } else { 687 recompute_zerodiag = PETSC_TRUE; 688 } 689 } 690 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 691 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 692 } 693 } else { /* there's just one subdomain (or zero if they have not been detected */ 694 PetscBool valid = PETSC_TRUE; 695 696 if (pcbddc->NeumannBoundariesLocal) { 697 PetscInt nzb; 698 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 699 if (nzb) valid = PETSC_FALSE; 700 } 701 if (valid && pressures) { 702 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 703 } 704 if (valid && pcbddc->current_level) { 705 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 706 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 707 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 708 for (j=0;j<n_interior_dofs;j++) { 709 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 710 valid = PETSC_FALSE; 711 break; 712 } 713 } 714 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 715 } 716 if (valid) { 717 pcbddc->benign_n = 1; 718 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 719 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 720 zerodiag_subs[0] = zerodiag; 721 } 722 } 723 if (pcbddc->current_level) { 724 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 725 } 726 } 727 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 728 729 if (!pcbddc->benign_n) { 730 PetscInt n; 731 732 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 733 recompute_zerodiag = PETSC_FALSE; 734 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 735 if (n) { 736 has_null_pressures = PETSC_FALSE; 737 have_null = PETSC_FALSE; 738 } 739 } 740 741 /* final check for null pressures */ 742 if (zerodiag && pressures) { 743 PetscInt nz,np; 744 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 745 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 746 if (nz != np) have_null = PETSC_FALSE; 747 } 748 749 if (recompute_zerodiag) { 750 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 751 if (pcbddc->benign_n == 1) { 752 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 753 zerodiag = zerodiag_subs[0]; 754 } else { 755 PetscInt i,nzn,*new_idxs; 756 757 nzn = 0; 758 for (i=0;i<pcbddc->benign_n;i++) { 759 PetscInt ns; 760 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 761 nzn += ns; 762 } 763 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 764 nzn = 0; 765 for (i=0;i<pcbddc->benign_n;i++) { 766 PetscInt ns,*idxs; 767 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 768 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 769 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 770 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 771 nzn += ns; 772 } 773 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 774 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 775 } 776 have_null = PETSC_FALSE; 777 } 778 779 /* no-net-flux */ 780 if (pcbddc->benign_compute_nonetflux) { 781 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 782 MatNullSpace near_null_space; 783 ISLocalToGlobalMapping rmap; 784 Vec quad_vec; 785 PetscScalar *pvals; 786 PetscInt i,np,*dummyins; 787 IS isused = NULL; 788 PetscBool participate = PETSC_TRUE; 789 790 /* create vector to hold quadrature weights */ 791 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 792 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 793 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 794 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 795 796 /* compute B^{(i)T} * 1_p */ 797 np = 0; 798 if (pressures) { 799 isused = pressures; 800 } else { 801 isused = zerodiag; 802 } 803 if (isused) { 804 ierr = ISGetLocalSize(isused,&np);CHKERRQ(ierr); 805 } 806 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 807 for (i=0;i<np;i++) pvals[i] = 1.; 808 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 809 if (np) { 810 const PetscInt *pidxs; 811 812 if (isused) { 813 ierr = ISGetIndices(isused,&pidxs);CHKERRQ(ierr); 814 } 815 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 816 if (isused) { 817 ierr = ISRestoreIndices(isused,&pidxs);CHKERRQ(ierr); 818 } 819 } 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 823 ierr = PetscFree(pvals);CHKERRQ(ierr); 824 if (!isused) participate = PETSC_FALSE; 825 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 826 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 827 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 828 for (i=0;i<matis->sf->nleaves;i++) 829 if (participate) matis->sf_leafdata[i] = PetscGlobalRank; 830 else matis->sf_leafdata[i] = -1; 831 832 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 833 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 834 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 835 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 836 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 837 for (i=0;i<matis->sf->nleaves;i++) { 838 if (PetscGlobalRank != matis->sf_leafdata[i]) { 839 pvals[i] = 0.; 840 } 841 } 842 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 843 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 844 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 845 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 846 ierr = PetscFree(dummyins);CHKERRQ(ierr); 847 848 /* assembly quadrature vec and attach near null space to pmat */ 849 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 850 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 851 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 852 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 853 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 854 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 855 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 856 } 857 858 /* change of basis and p0 dofs */ 859 if (has_null_pressures) { 860 IS zerodiagc; 861 const PetscInt *idxs,*idxsc; 862 PetscInt i,s,*nnz; 863 864 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 865 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 866 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 867 /* local change of basis for pressures */ 868 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 869 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 870 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 871 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 872 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 873 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 874 for (i=0;i<pcbddc->benign_n;i++) { 875 PetscInt nzs,j; 876 877 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 878 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 879 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 880 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 881 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 882 } 883 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 884 ierr = PetscFree(nnz);CHKERRQ(ierr); 885 /* set identity on velocities */ 886 for (i=0;i<n-nz;i++) { 887 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 888 } 889 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 890 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 891 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 892 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 893 /* set change on pressures */ 894 for (s=0;s<pcbddc->benign_n;s++) { 895 PetscScalar *array; 896 PetscInt nzs; 897 898 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 899 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 900 for (i=0;i<nzs-1;i++) { 901 PetscScalar vals[2]; 902 PetscInt cols[2]; 903 904 cols[0] = idxs[i]; 905 cols[1] = idxs[nzs-1]; 906 vals[0] = 1.; 907 vals[1] = 1.; 908 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 909 } 910 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 911 for (i=0;i<nzs-1;i++) array[i] = -1.; 912 array[nzs-1] = 1.; 913 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 914 /* store local idxs for p0 */ 915 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 916 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 917 ierr = PetscFree(array);CHKERRQ(ierr); 918 } 919 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 921 /* project if needed */ 922 if (pcbddc->benign_change_explicit) { 923 Mat M; 924 925 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 926 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 927 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 928 ierr = MatDestroy(&M);CHKERRQ(ierr); 929 } 930 /* store global idxs for p0 */ 931 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 932 } 933 pcbddc->benign_zerodiag_subs = zerodiag_subs; 934 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 935 936 /* determines if the coarse solver will be singular or not */ 937 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 938 /* determines if the problem has subdomains with 0 pressure block */ 939 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 940 *zerodiaglocal = zerodiag; 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 946 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 947 { 948 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 949 PetscScalar *array; 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 if (!pcbddc->benign_sf) { 954 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 955 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 956 } 957 if (get) { 958 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 959 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 960 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 961 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 962 } else { 963 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 964 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 965 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 966 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 973 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 974 { 975 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 /* TODO: add error checking 980 - avoid nested pop (or push) calls. 981 - cannot push before pop. 982 - cannot call this if pcbddc->local_mat is NULL 983 */ 984 if (!pcbddc->benign_n) { 985 PetscFunctionReturn(0); 986 } 987 if (pop) { 988 if (pcbddc->benign_change_explicit) { 989 IS is_p0; 990 MatReuse reuse; 991 992 /* extract B_0 */ 993 reuse = MAT_INITIAL_MATRIX; 994 if (pcbddc->benign_B0) { 995 reuse = MAT_REUSE_MATRIX; 996 } 997 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 998 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 999 /* remove rows and cols from local problem */ 1000 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1001 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1002 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1003 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1004 } else { 1005 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1006 PetscScalar *vals; 1007 PetscInt i,n,*idxs_ins; 1008 1009 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1010 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1011 if (!pcbddc->benign_B0) { 1012 PetscInt *nnz; 1013 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1014 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1015 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1016 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1017 for (i=0;i<pcbddc->benign_n;i++) { 1018 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1019 nnz[i] = n - nnz[i]; 1020 } 1021 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1022 ierr = PetscFree(nnz);CHKERRQ(ierr); 1023 } 1024 1025 for (i=0;i<pcbddc->benign_n;i++) { 1026 PetscScalar *array; 1027 PetscInt *idxs,j,nz,cum; 1028 1029 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1030 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1031 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1032 for (j=0;j<nz;j++) vals[j] = 1.; 1033 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1034 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1035 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1036 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1037 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1038 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1039 cum = 0; 1040 for (j=0;j<n;j++) { 1041 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1042 vals[cum] = array[j]; 1043 idxs_ins[cum] = j; 1044 cum++; 1045 } 1046 } 1047 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1049 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1050 } 1051 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1054 } 1055 } else { /* push */ 1056 if (pcbddc->benign_change_explicit) { 1057 PetscInt i; 1058 1059 for (i=0;i<pcbddc->benign_n;i++) { 1060 PetscScalar *B0_vals; 1061 PetscInt *B0_cols,B0_ncol; 1062 1063 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1064 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1065 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1066 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1067 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1068 } 1069 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1070 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1071 } else { 1072 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1073 } 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1080 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1081 { 1082 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1083 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1084 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1085 PetscBLASInt *B_iwork,*B_ifail; 1086 PetscScalar *work,lwork; 1087 PetscScalar *St,*S,*eigv; 1088 PetscScalar *Sarray,*Starray; 1089 PetscReal *eigs,thresh; 1090 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1091 PetscBool allocated_S_St; 1092 #if defined(PETSC_USE_COMPLEX) 1093 PetscReal *rwork; 1094 #endif 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1099 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1100 1101 if (pcbddc->dbg_flag) { 1102 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1103 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1104 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1105 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1106 } 1107 1108 if (pcbddc->dbg_flag) { 1109 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1110 } 1111 1112 /* max size of subsets */ 1113 mss = 0; 1114 for (i=0;i<sub_schurs->n_subs;i++) { 1115 PetscInt subset_size; 1116 1117 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1118 mss = PetscMax(mss,subset_size); 1119 } 1120 1121 /* min/max and threshold */ 1122 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1123 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1124 nmax = PetscMax(nmin,nmax); 1125 allocated_S_St = PETSC_FALSE; 1126 if (nmin) { 1127 allocated_S_St = PETSC_TRUE; 1128 } 1129 1130 /* allocate lapack workspace */ 1131 cum = cum2 = 0; 1132 maxneigs = 0; 1133 for (i=0;i<sub_schurs->n_subs;i++) { 1134 PetscInt n,subset_size; 1135 1136 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1137 n = PetscMin(subset_size,nmax); 1138 cum += subset_size; 1139 cum2 += subset_size*n; 1140 maxneigs = PetscMax(maxneigs,n); 1141 } 1142 if (mss) { 1143 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1144 PetscBLASInt B_itype = 1; 1145 PetscBLASInt B_N = mss; 1146 PetscReal zero = 0.0; 1147 PetscReal eps = 0.0; /* dlamch? */ 1148 1149 B_lwork = -1; 1150 S = NULL; 1151 St = NULL; 1152 eigs = NULL; 1153 eigv = NULL; 1154 B_iwork = NULL; 1155 B_ifail = NULL; 1156 #if defined(PETSC_USE_COMPLEX) 1157 rwork = NULL; 1158 #endif 1159 thresh = 1.0; 1160 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1161 #if defined(PETSC_USE_COMPLEX) 1162 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1163 #else 1164 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1165 #endif 1166 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1167 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1168 } else { 1169 /* TODO */ 1170 } 1171 } else { 1172 lwork = 0; 1173 } 1174 1175 nv = 0; 1176 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1177 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1178 } 1179 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1180 if (allocated_S_St) { 1181 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1182 } 1183 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1184 #if defined(PETSC_USE_COMPLEX) 1185 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1186 #endif 1187 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1188 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1189 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1190 nv+cum,&pcbddc->adaptive_constraints_idxs, 1191 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1192 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1193 1194 maxneigs = 0; 1195 cum = cumarray = 0; 1196 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1197 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1198 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1199 const PetscInt *idxs; 1200 1201 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1202 for (cum=0;cum<nv;cum++) { 1203 pcbddc->adaptive_constraints_n[cum] = 1; 1204 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1205 pcbddc->adaptive_constraints_data[cum] = 1.0; 1206 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1207 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1208 } 1209 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1210 } 1211 1212 if (mss) { /* multilevel */ 1213 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1214 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1215 } 1216 1217 thresh = pcbddc->adaptive_threshold; 1218 for (i=0;i<sub_schurs->n_subs;i++) { 1219 const PetscInt *idxs; 1220 PetscReal upper,lower; 1221 PetscInt j,subset_size,eigs_start = 0; 1222 PetscBLASInt B_N; 1223 PetscBool same_data = PETSC_FALSE; 1224 1225 if (pcbddc->use_deluxe_scaling) { 1226 upper = PETSC_MAX_REAL; 1227 lower = thresh; 1228 } else { 1229 upper = 1./thresh; 1230 lower = 0.; 1231 } 1232 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1233 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1234 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1235 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1236 if (sub_schurs->is_hermitian) { 1237 PetscInt j,k; 1238 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1239 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1240 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1241 } 1242 for (j=0;j<subset_size;j++) { 1243 for (k=j;k<subset_size;k++) { 1244 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1245 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1246 } 1247 } 1248 } else { 1249 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1250 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1251 } 1252 } else { 1253 S = Sarray + cumarray; 1254 St = Starray + cumarray; 1255 } 1256 /* see if we can save some work */ 1257 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1258 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1259 } 1260 1261 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1262 B_neigs = 0; 1263 } else { 1264 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1265 PetscBLASInt B_itype = 1; 1266 PetscBLASInt B_IL, B_IU; 1267 PetscReal eps = -1.0; /* dlamch? */ 1268 PetscInt nmin_s; 1269 PetscBool compute_range = PETSC_FALSE; 1270 1271 if (pcbddc->dbg_flag) { 1272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1273 } 1274 1275 compute_range = PETSC_FALSE; 1276 if (thresh > 1.+PETSC_SMALL && !same_data) { 1277 compute_range = PETSC_TRUE; 1278 } 1279 1280 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1281 if (compute_range) { 1282 1283 /* ask for eigenvalues larger than thresh */ 1284 #if defined(PETSC_USE_COMPLEX) 1285 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1286 #else 1287 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1288 #endif 1289 } else if (!same_data) { 1290 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1291 B_IL = 1; 1292 #if defined(PETSC_USE_COMPLEX) 1293 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1294 #else 1295 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1296 #endif 1297 } else { /* same_data is true, so get the adaptive function requested by the user */ 1298 PetscInt k; 1299 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1300 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1301 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1302 nmin = nmax; 1303 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1304 for (k=0;k<nmax;k++) { 1305 eigs[k] = 1./PETSC_SMALL; 1306 eigv[k*(subset_size+1)] = 1.0; 1307 } 1308 } 1309 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1310 if (B_ierr) { 1311 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1312 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1313 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1314 } 1315 1316 if (B_neigs > nmax) { 1317 if (pcbddc->dbg_flag) { 1318 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1319 } 1320 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1321 B_neigs = nmax; 1322 } 1323 1324 nmin_s = PetscMin(nmin,B_N); 1325 if (B_neigs < nmin_s) { 1326 PetscBLASInt B_neigs2; 1327 1328 if (pcbddc->use_deluxe_scaling) { 1329 B_IL = B_N - nmin_s + 1; 1330 B_IU = B_N - B_neigs; 1331 } else { 1332 B_IL = B_neigs + 1; 1333 B_IU = nmin_s; 1334 } 1335 if (pcbddc->dbg_flag) { 1336 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1337 } 1338 if (sub_schurs->is_hermitian) { 1339 PetscInt j; 1340 for (j=0;j<subset_size;j++) { 1341 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1342 } 1343 for (j=0;j<subset_size;j++) { 1344 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1345 } 1346 } else { 1347 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1348 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1349 } 1350 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1351 #if defined(PETSC_USE_COMPLEX) 1352 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1353 #else 1354 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1355 #endif 1356 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1357 B_neigs += B_neigs2; 1358 } 1359 if (B_ierr) { 1360 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1361 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1362 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1363 } 1364 if (pcbddc->dbg_flag) { 1365 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1366 for (j=0;j<B_neigs;j++) { 1367 if (eigs[j] == 0.0) { 1368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1369 } else { 1370 if (pcbddc->use_deluxe_scaling) { 1371 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1372 } else { 1373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1374 } 1375 } 1376 } 1377 } 1378 } else { 1379 /* TODO */ 1380 } 1381 } 1382 /* change the basis back to the original one */ 1383 if (sub_schurs->change) { 1384 Mat change,phi,phit; 1385 1386 if (pcbddc->dbg_flag > 1) { 1387 PetscInt ii; 1388 for (ii=0;ii<B_neigs;ii++) { 1389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1390 for (j=0;j<B_N;j++) { 1391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1392 } 1393 } 1394 } 1395 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1396 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1397 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1398 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1399 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1400 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1401 } 1402 maxneigs = PetscMax(B_neigs,maxneigs); 1403 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1404 if (B_neigs) { 1405 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1406 1407 if (pcbddc->dbg_flag > 1) { 1408 PetscInt ii; 1409 for (ii=0;ii<B_neigs;ii++) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1411 for (j=0;j<B_N;j++) { 1412 #if defined(PETSC_USE_COMPLEX) 1413 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1414 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1415 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1416 #else 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1418 #endif 1419 } 1420 } 1421 } 1422 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1423 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1424 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1425 cum++; 1426 } 1427 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1428 /* shift for next computation */ 1429 cumarray += subset_size*subset_size; 1430 } 1431 if (pcbddc->dbg_flag) { 1432 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1433 } 1434 1435 if (mss) { 1436 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1437 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1438 /* destroy matrices (junk) */ 1439 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1440 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1441 } 1442 if (allocated_S_St) { 1443 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1444 } 1445 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1446 #if defined(PETSC_USE_COMPLEX) 1447 ierr = PetscFree(rwork);CHKERRQ(ierr); 1448 #endif 1449 if (pcbddc->dbg_flag) { 1450 PetscInt maxneigs_r; 1451 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "PCBDDCSetUpSolvers" 1459 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1460 { 1461 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1462 PetscScalar *coarse_submat_vals; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1467 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1468 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1469 1470 /* Setup local neumann solver ksp_R */ 1471 /* PCBDDCSetUpLocalScatters should be called first! */ 1472 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1473 1474 /* Change global null space passed in by the user if change of basis has been requested */ 1475 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1476 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1477 } 1478 1479 /* 1480 Setup local correction and local part of coarse basis. 1481 Gives back the dense local part of the coarse matrix in column major ordering 1482 */ 1483 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1484 1485 /* Compute total number of coarse nodes and setup coarse solver */ 1486 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1487 1488 /* free */ 1489 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "PCBDDCResetCustomization" 1495 PetscErrorCode PCBDDCResetCustomization(PC pc) 1496 { 1497 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1502 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1503 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1504 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1505 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1506 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1507 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1508 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1509 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1510 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1511 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1512 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "PCBDDCResetTopography" 1518 PetscErrorCode PCBDDCResetTopography(PC pc) 1519 { 1520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1521 PetscInt i; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1526 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1527 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1528 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1529 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1530 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1531 for (i=0;i<pcbddc->n_local_subs;i++) { 1532 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1533 } 1534 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1535 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "PCBDDCResetSolvers" 1541 PetscErrorCode PCBDDCResetSolvers(PC pc) 1542 { 1543 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1548 if (pcbddc->coarse_phi_B) { 1549 PetscScalar *array; 1550 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1551 ierr = PetscFree(array);CHKERRQ(ierr); 1552 } 1553 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1554 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1555 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1556 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1557 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1558 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1559 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1560 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1561 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1562 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1563 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1564 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1565 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1566 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1567 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1568 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1569 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1570 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1571 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1572 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1573 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1574 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1575 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1576 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1577 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1578 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1579 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1580 if (pcbddc->benign_zerodiag_subs) { 1581 PetscInt i; 1582 for (i=0;i<pcbddc->benign_n;i++) { 1583 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1584 } 1585 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1586 } 1587 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1593 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1594 { 1595 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1596 PC_IS *pcis = (PC_IS*)pc->data; 1597 VecType impVecType; 1598 PetscInt n_constraints,n_R,old_size; 1599 PetscErrorCode ierr; 1600 1601 PetscFunctionBegin; 1602 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1603 /* get sizes */ 1604 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1605 n_R = pcis->n - pcbddc->n_vertices; 1606 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1607 /* local work vectors (try to avoid unneeded work)*/ 1608 /* R nodes */ 1609 old_size = -1; 1610 if (pcbddc->vec1_R) { 1611 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1612 } 1613 if (n_R != old_size) { 1614 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1615 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1616 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1617 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1618 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1619 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1620 } 1621 /* local primal dofs */ 1622 old_size = -1; 1623 if (pcbddc->vec1_P) { 1624 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1625 } 1626 if (pcbddc->local_primal_size != old_size) { 1627 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1628 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1629 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1630 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1631 } 1632 /* local explicit constraints */ 1633 old_size = -1; 1634 if (pcbddc->vec1_C) { 1635 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1636 } 1637 if (n_constraints && n_constraints != old_size) { 1638 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1639 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1640 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1641 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1642 } 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "PCBDDCSetUpCorrection" 1648 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1649 { 1650 PetscErrorCode ierr; 1651 /* pointers to pcis and pcbddc */ 1652 PC_IS* pcis = (PC_IS*)pc->data; 1653 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1654 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1655 /* submatrices of local problem */ 1656 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1657 /* submatrices of local coarse problem */ 1658 Mat S_VV,S_CV,S_VC,S_CC; 1659 /* working matrices */ 1660 Mat C_CR; 1661 /* additional working stuff */ 1662 PC pc_R; 1663 Mat F; 1664 Vec dummy_vec; 1665 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1666 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1667 PetscScalar *work; 1668 PetscInt *idx_V_B; 1669 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1670 PetscInt i,n_R,n_D,n_B; 1671 1672 /* some shortcuts to scalars */ 1673 PetscScalar one=1.0,m_one=-1.0; 1674 1675 PetscFunctionBegin; 1676 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1677 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1678 } 1679 1680 /* Set Non-overlapping dimensions */ 1681 n_vertices = pcbddc->n_vertices; 1682 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1683 n_B = pcis->n_B; 1684 n_D = pcis->n - n_B; 1685 n_R = pcis->n - n_vertices; 1686 1687 /* vertices in boundary numbering */ 1688 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1689 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1690 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1691 1692 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1693 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1694 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1695 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1696 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1697 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1698 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1699 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1700 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1701 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1702 1703 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1704 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1705 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1706 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1707 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1708 lda_rhs = n_R; 1709 need_benign_correction = PETSC_FALSE; 1710 if (isLU || isILU || isCHOL) { 1711 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1712 } else if (sub_schurs->reuse_solver) { 1713 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1714 MatFactorType type; 1715 1716 F = reuse_solver->F; 1717 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1718 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1719 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1720 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1721 } else { 1722 F = NULL; 1723 } 1724 1725 /* allocate workspace */ 1726 n = 0; 1727 if (n_constraints) { 1728 n += lda_rhs*n_constraints; 1729 } 1730 if (n_vertices) { 1731 n = PetscMax(2*lda_rhs*n_vertices,n); 1732 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1733 } 1734 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1735 1736 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1737 dummy_vec = NULL; 1738 if (need_benign_correction && lda_rhs != n_R && F) { 1739 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1740 } 1741 1742 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1743 if (n_constraints) { 1744 Mat M1,M2,M3,C_B; 1745 IS is_aux; 1746 PetscScalar *array,*array2; 1747 1748 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1749 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1750 1751 /* Extract constraints on R nodes: C_{CR} */ 1752 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1753 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1754 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1755 1756 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1757 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1758 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1759 for (i=0;i<n_constraints;i++) { 1760 const PetscScalar *row_cmat_values; 1761 const PetscInt *row_cmat_indices; 1762 PetscInt size_of_constraint,j; 1763 1764 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1765 for (j=0;j<size_of_constraint;j++) { 1766 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1767 } 1768 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1769 } 1770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1771 if (F) { 1772 Mat B; 1773 1774 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1775 if (need_benign_correction) { 1776 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1777 1778 /* rhs is already zero on interior dofs, no need to change the rhs */ 1779 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1780 } 1781 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1782 if (need_benign_correction) { 1783 PetscScalar *marr; 1784 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1785 1786 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1787 if (lda_rhs != n_R) { 1788 for (i=0;i<n_constraints;i++) { 1789 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1790 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1791 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1792 } 1793 } else { 1794 for (i=0;i<n_constraints;i++) { 1795 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1796 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1797 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1798 } 1799 } 1800 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1801 } 1802 ierr = MatDestroy(&B);CHKERRQ(ierr); 1803 } else { 1804 PetscScalar *marr; 1805 1806 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1807 for (i=0;i<n_constraints;i++) { 1808 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1809 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1810 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1811 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1812 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1813 } 1814 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1815 } 1816 if (!pcbddc->switch_static) { 1817 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1818 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1819 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1820 for (i=0;i<n_constraints;i++) { 1821 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1822 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1823 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1824 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1825 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1827 } 1828 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1829 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1830 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1831 } else { 1832 if (lda_rhs != n_R) { 1833 IS dummy; 1834 1835 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1836 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1837 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1838 } else { 1839 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1840 pcbddc->local_auxmat2 = local_auxmat2_R; 1841 } 1842 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1843 } 1844 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1845 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1846 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1847 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1848 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1849 if (isCHOL) { 1850 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1851 } else { 1852 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1853 } 1854 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1855 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1856 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1857 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1858 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1859 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1860 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1861 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1862 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1863 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1864 } 1865 1866 /* Get submatrices from subdomain matrix */ 1867 if (n_vertices) { 1868 IS is_aux; 1869 1870 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1871 IS tis; 1872 1873 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1874 ierr = ISSort(tis);CHKERRQ(ierr); 1875 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1876 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1877 } else { 1878 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1879 } 1880 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1881 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1882 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1883 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1884 } 1885 1886 /* Matrix of coarse basis functions (local) */ 1887 if (pcbddc->coarse_phi_B) { 1888 PetscInt on_B,on_primal,on_D=n_D; 1889 if (pcbddc->coarse_phi_D) { 1890 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1891 } 1892 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1893 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1894 PetscScalar *marray; 1895 1896 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1897 ierr = PetscFree(marray);CHKERRQ(ierr); 1898 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1899 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1900 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1901 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1902 } 1903 } 1904 1905 if (!pcbddc->coarse_phi_B) { 1906 PetscScalar *marray; 1907 1908 n = n_B*pcbddc->local_primal_size; 1909 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1910 n += n_D*pcbddc->local_primal_size; 1911 } 1912 if (!pcbddc->symmetric_primal) { 1913 n *= 2; 1914 } 1915 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1917 n = n_B*pcbddc->local_primal_size; 1918 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1919 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1920 n += n_D*pcbddc->local_primal_size; 1921 } 1922 if (!pcbddc->symmetric_primal) { 1923 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1924 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1925 n = n_B*pcbddc->local_primal_size; 1926 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1927 } 1928 } else { 1929 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1930 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1931 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1932 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1933 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1934 } 1935 } 1936 } 1937 1938 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1939 p0_lidx_I = NULL; 1940 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1941 const PetscInt *idxs; 1942 1943 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1944 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1945 for (i=0;i<pcbddc->benign_n;i++) { 1946 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1947 } 1948 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1949 } 1950 1951 /* vertices */ 1952 if (n_vertices) { 1953 1954 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1955 1956 if (n_R) { 1957 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1958 PetscBLASInt B_N,B_one = 1; 1959 PetscScalar *x,*y; 1960 PetscBool isseqaij; 1961 1962 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1963 if (need_benign_correction) { 1964 ISLocalToGlobalMapping RtoN; 1965 IS is_p0; 1966 PetscInt *idxs_p0,n; 1967 1968 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1969 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1970 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1971 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1972 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1973 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1974 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1975 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1976 } 1977 1978 if (lda_rhs == n_R) { 1979 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1980 } else { 1981 PetscScalar *av,*array; 1982 const PetscInt *xadj,*adjncy; 1983 PetscInt n; 1984 PetscBool flg_row; 1985 1986 array = work+lda_rhs*n_vertices; 1987 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1988 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1989 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1990 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1991 for (i=0;i<n;i++) { 1992 PetscInt j; 1993 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1994 } 1995 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1996 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1997 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1998 } 1999 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2000 if (need_benign_correction) { 2001 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2002 PetscScalar *marr; 2003 2004 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2005 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2006 2007 | 0 0 0 | (V) 2008 L = | 0 0 -1 | (P-p0) 2009 | 0 0 -1 | (p0) 2010 2011 */ 2012 for (i=0;i<reuse_solver->benign_n;i++) { 2013 const PetscScalar *vals; 2014 const PetscInt *idxs,*idxs_zero; 2015 PetscInt n,j,nz; 2016 2017 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2018 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2019 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2020 for (j=0;j<n;j++) { 2021 PetscScalar val = vals[j]; 2022 PetscInt k,col = idxs[j]; 2023 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2024 } 2025 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2026 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2027 } 2028 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2029 } 2030 if (F) { 2031 /* need to correct the rhs */ 2032 if (need_benign_correction) { 2033 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2034 PetscScalar *marr; 2035 2036 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2037 if (lda_rhs != n_R) { 2038 for (i=0;i<n_vertices;i++) { 2039 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2040 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2041 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2042 } 2043 } else { 2044 for (i=0;i<n_vertices;i++) { 2045 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2046 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2047 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2048 } 2049 } 2050 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2051 } 2052 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2053 /* need to correct the solution */ 2054 if (need_benign_correction) { 2055 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2056 PetscScalar *marr; 2057 2058 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2059 if (lda_rhs != n_R) { 2060 for (i=0;i<n_vertices;i++) { 2061 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2062 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2063 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2064 } 2065 } else { 2066 for (i=0;i<n_vertices;i++) { 2067 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2068 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2069 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2070 } 2071 } 2072 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2073 } 2074 } else { 2075 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2076 for (i=0;i<n_vertices;i++) { 2077 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2078 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2079 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2080 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2081 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2082 } 2083 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2084 } 2085 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2086 /* S_VV and S_CV */ 2087 if (n_constraints) { 2088 Mat B; 2089 2090 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2091 for (i=0;i<n_vertices;i++) { 2092 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2093 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2094 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2096 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2097 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2098 } 2099 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2100 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2101 ierr = MatDestroy(&B);CHKERRQ(ierr); 2102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2103 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2104 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2105 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2106 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2107 ierr = MatDestroy(&B);CHKERRQ(ierr); 2108 } 2109 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2110 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2111 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2112 } 2113 if (lda_rhs != n_R) { 2114 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2115 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2116 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2117 } 2118 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2119 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2120 if (need_benign_correction) { 2121 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2122 PetscScalar *marr,*sums; 2123 2124 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2125 ierr = MatDenseGetArray(S_VVt,&marr); 2126 for (i=0;i<reuse_solver->benign_n;i++) { 2127 const PetscScalar *vals; 2128 const PetscInt *idxs,*idxs_zero; 2129 PetscInt n,j,nz; 2130 2131 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2132 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2133 for (j=0;j<n_vertices;j++) { 2134 PetscInt k; 2135 sums[j] = 0.; 2136 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2137 } 2138 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2139 for (j=0;j<n;j++) { 2140 PetscScalar val = vals[j]; 2141 PetscInt k; 2142 for (k=0;k<n_vertices;k++) { 2143 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2144 } 2145 } 2146 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2147 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2148 } 2149 ierr = PetscFree(sums);CHKERRQ(ierr); 2150 ierr = MatDenseRestoreArray(S_VVt,&marr); 2151 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2152 } 2153 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2154 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2155 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2156 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2157 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2158 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2159 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2160 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2161 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2164 } 2165 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2166 2167 /* coarse basis functions */ 2168 for (i=0;i<n_vertices;i++) { 2169 PetscScalar *y; 2170 2171 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2172 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2173 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2174 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2175 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2176 y[n_B*i+idx_V_B[i]] = 1.0; 2177 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2178 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2179 2180 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2181 PetscInt j; 2182 2183 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2184 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2185 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2186 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2187 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2188 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2189 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2190 } 2191 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2192 } 2193 /* if n_R == 0 the object is not destroyed */ 2194 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2195 } 2196 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2197 2198 if (n_constraints) { 2199 Mat B; 2200 2201 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2202 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2203 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2204 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2205 if (n_vertices) { 2206 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2207 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2208 } else { 2209 Mat S_VCt; 2210 2211 if (lda_rhs != n_R) { 2212 ierr = MatDestroy(&B);CHKERRQ(ierr); 2213 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2214 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2215 } 2216 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2217 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2218 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2219 } 2220 } 2221 ierr = MatDestroy(&B);CHKERRQ(ierr); 2222 /* coarse basis functions */ 2223 for (i=0;i<n_constraints;i++) { 2224 PetscScalar *y; 2225 2226 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2227 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2228 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2229 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2230 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2231 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2232 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2233 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2234 PetscInt j; 2235 2236 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2237 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2238 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2239 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2240 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2241 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2242 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2243 } 2244 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2245 } 2246 } 2247 if (n_constraints) { 2248 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2249 } 2250 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2251 2252 /* coarse matrix entries relative to B_0 */ 2253 if (pcbddc->benign_n) { 2254 Mat B0_B,B0_BPHI; 2255 IS is_dummy; 2256 PetscScalar *data; 2257 PetscInt j; 2258 2259 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2260 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2261 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2262 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2263 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2264 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2265 for (j=0;j<pcbddc->benign_n;j++) { 2266 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2267 for (i=0;i<pcbddc->local_primal_size;i++) { 2268 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2269 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2270 } 2271 } 2272 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2273 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2274 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2275 } 2276 2277 /* compute other basis functions for non-symmetric problems */ 2278 if (!pcbddc->symmetric_primal) { 2279 Mat B_V=NULL,B_C=NULL; 2280 PetscScalar *marray; 2281 2282 if (n_constraints) { 2283 Mat S_CCT,C_CRT; 2284 2285 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2286 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2287 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2288 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2289 if (n_vertices) { 2290 Mat S_VCT; 2291 2292 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2293 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2294 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2295 } 2296 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2297 } 2298 if (n_vertices && n_R) { 2299 PetscScalar *av,*marray; 2300 const PetscInt *xadj,*adjncy; 2301 PetscInt n; 2302 PetscBool flg_row; 2303 2304 /* B_V = B_V - A_VR^T */ 2305 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2306 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2307 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2308 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2309 for (i=0;i<n;i++) { 2310 PetscInt j; 2311 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2312 } 2313 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2314 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2315 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2316 } 2317 2318 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2319 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2320 for (i=0;i<n_vertices;i++) { 2321 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2322 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2323 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2324 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2325 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2326 } 2327 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2328 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2329 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2330 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2331 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2332 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2333 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2334 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2335 } 2336 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2337 /* coarse basis functions */ 2338 for (i=0;i<pcbddc->local_primal_size;i++) { 2339 PetscScalar *y; 2340 2341 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2342 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2343 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2344 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2345 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2346 if (i<n_vertices) { 2347 y[n_B*i+idx_V_B[i]] = 1.0; 2348 } 2349 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2350 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2351 2352 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2353 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2354 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2355 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2356 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2357 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2358 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2359 } 2360 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2361 } 2362 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2363 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2364 } 2365 /* free memory */ 2366 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2367 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2368 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2369 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2370 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2371 ierr = PetscFree(work);CHKERRQ(ierr); 2372 if (n_vertices) { 2373 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2374 } 2375 if (n_constraints) { 2376 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2377 } 2378 /* Checking coarse_sub_mat and coarse basis functios */ 2379 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2380 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2381 if (pcbddc->dbg_flag) { 2382 Mat coarse_sub_mat; 2383 Mat AUXMAT,TM1,TM2,TM3,TM4; 2384 Mat coarse_phi_D,coarse_phi_B; 2385 Mat coarse_psi_D,coarse_psi_B; 2386 Mat A_II,A_BB,A_IB,A_BI; 2387 Mat C_B,CPHI; 2388 IS is_dummy; 2389 Vec mones; 2390 MatType checkmattype=MATSEQAIJ; 2391 PetscReal real_value; 2392 2393 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2394 Mat A; 2395 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2396 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2397 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2398 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2399 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2400 ierr = MatDestroy(&A);CHKERRQ(ierr); 2401 } else { 2402 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2403 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2404 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2405 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2406 } 2407 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2408 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2409 if (!pcbddc->symmetric_primal) { 2410 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2411 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2412 } 2413 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2414 2415 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2418 if (!pcbddc->symmetric_primal) { 2419 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2420 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2421 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2422 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2423 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2424 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2425 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2426 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2427 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2428 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2429 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2430 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2431 } else { 2432 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2433 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2434 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2435 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2436 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2437 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2438 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2439 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2440 } 2441 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2442 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2443 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2444 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2445 if (pcbddc->benign_n) { 2446 Mat B0_B,B0_BPHI; 2447 PetscScalar *data,*data2; 2448 PetscInt j; 2449 2450 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2451 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2452 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2453 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2454 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2455 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2456 for (j=0;j<pcbddc->benign_n;j++) { 2457 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2458 for (i=0;i<pcbddc->local_primal_size;i++) { 2459 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2460 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2461 } 2462 } 2463 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2464 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2465 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2466 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2467 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2468 } 2469 #if 0 2470 { 2471 PetscViewer viewer; 2472 char filename[256]; 2473 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2474 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2475 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2476 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2477 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2478 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2479 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2480 if (save_change) { 2481 Mat phi_B; 2482 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2483 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2484 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2485 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2486 } else { 2487 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2488 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2489 } 2490 if (pcbddc->coarse_phi_D) { 2491 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2492 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2493 } 2494 if (pcbddc->coarse_psi_B) { 2495 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2496 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2497 } 2498 if (pcbddc->coarse_psi_D) { 2499 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2500 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2501 } 2502 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2503 } 2504 #endif 2505 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2506 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2507 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2508 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2509 2510 /* check constraints */ 2511 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2512 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2513 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2514 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2515 } else { 2516 PetscScalar *data; 2517 Mat tmat; 2518 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2519 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2520 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2521 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2522 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2523 } 2524 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2525 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2526 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2527 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2528 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2529 if (!pcbddc->symmetric_primal) { 2530 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2531 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2532 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2533 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2534 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2535 } 2536 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2537 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2538 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2539 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2540 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2541 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2542 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2543 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2544 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2545 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2546 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2547 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2548 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2549 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2550 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2551 if (!pcbddc->symmetric_primal) { 2552 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2553 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2554 } 2555 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2556 } 2557 /* get back data */ 2558 *coarse_submat_vals_n = coarse_submat_vals; 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2564 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2565 { 2566 Mat *work_mat; 2567 IS isrow_s,iscol_s; 2568 PetscBool rsorted,csorted; 2569 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2570 PetscErrorCode ierr; 2571 2572 PetscFunctionBegin; 2573 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2574 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2575 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2576 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2577 2578 if (!rsorted) { 2579 const PetscInt *idxs; 2580 PetscInt *idxs_sorted,i; 2581 2582 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2583 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2584 for (i=0;i<rsize;i++) { 2585 idxs_perm_r[i] = i; 2586 } 2587 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2588 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2589 for (i=0;i<rsize;i++) { 2590 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2591 } 2592 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2593 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2594 } else { 2595 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2596 isrow_s = isrow; 2597 } 2598 2599 if (!csorted) { 2600 if (isrow == iscol) { 2601 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2602 iscol_s = isrow_s; 2603 } else { 2604 const PetscInt *idxs; 2605 PetscInt *idxs_sorted,i; 2606 2607 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2608 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2609 for (i=0;i<csize;i++) { 2610 idxs_perm_c[i] = i; 2611 } 2612 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2613 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2614 for (i=0;i<csize;i++) { 2615 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2616 } 2617 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2618 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2619 } 2620 } else { 2621 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2622 iscol_s = iscol; 2623 } 2624 2625 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2626 2627 if (!rsorted || !csorted) { 2628 Mat new_mat; 2629 IS is_perm_r,is_perm_c; 2630 2631 if (!rsorted) { 2632 PetscInt *idxs_r,i; 2633 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2634 for (i=0;i<rsize;i++) { 2635 idxs_r[idxs_perm_r[i]] = i; 2636 } 2637 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2638 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2639 } else { 2640 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2641 } 2642 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2643 2644 if (!csorted) { 2645 if (isrow_s == iscol_s) { 2646 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2647 is_perm_c = is_perm_r; 2648 } else { 2649 PetscInt *idxs_c,i; 2650 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2651 for (i=0;i<csize;i++) { 2652 idxs_c[idxs_perm_c[i]] = i; 2653 } 2654 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2655 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2656 } 2657 } else { 2658 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2659 } 2660 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2661 2662 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2663 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2664 work_mat[0] = new_mat; 2665 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2666 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2667 } 2668 2669 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2670 *B = work_mat[0]; 2671 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2672 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2673 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2674 PetscFunctionReturn(0); 2675 } 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2679 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2680 { 2681 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2682 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2683 Mat new_mat; 2684 IS is_local,is_global; 2685 PetscInt local_size; 2686 PetscBool isseqaij; 2687 PetscErrorCode ierr; 2688 2689 PetscFunctionBegin; 2690 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2691 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2692 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2693 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2694 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2695 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2696 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2697 2698 /* check */ 2699 if (pcbddc->dbg_flag) { 2700 Vec x,x_change; 2701 PetscReal error; 2702 2703 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2704 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2705 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2706 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2707 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2708 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2709 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2710 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2711 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2712 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2713 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2714 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2715 ierr = VecDestroy(&x);CHKERRQ(ierr); 2716 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2717 } 2718 2719 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2720 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2721 if (isseqaij) { 2722 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2723 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2724 } else { 2725 Mat work_mat; 2726 2727 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2728 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2729 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2730 } 2731 if (matis->A->symmetric_set) { 2732 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2733 #if !defined(PETSC_USE_COMPLEX) 2734 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2735 #endif 2736 } 2737 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2738 PetscFunctionReturn(0); 2739 } 2740 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2743 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2744 { 2745 PC_IS* pcis = (PC_IS*)(pc->data); 2746 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2747 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2748 PetscInt *idx_R_local=NULL; 2749 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2750 PetscInt vbs,bs; 2751 PetscBT bitmask=NULL; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 /* 2756 No need to setup local scatters if 2757 - primal space is unchanged 2758 AND 2759 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2760 AND 2761 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2762 */ 2763 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2764 PetscFunctionReturn(0); 2765 } 2766 /* destroy old objects */ 2767 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2768 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2769 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2770 /* Set Non-overlapping dimensions */ 2771 n_B = pcis->n_B; 2772 n_D = pcis->n - n_B; 2773 n_vertices = pcbddc->n_vertices; 2774 2775 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2776 2777 /* create auxiliary bitmask and allocate workspace */ 2778 if (!sub_schurs->reuse_solver) { 2779 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2780 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2781 for (i=0;i<n_vertices;i++) { 2782 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2783 } 2784 2785 for (i=0, n_R=0; i<pcis->n; i++) { 2786 if (!PetscBTLookup(bitmask,i)) { 2787 idx_R_local[n_R++] = i; 2788 } 2789 } 2790 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2791 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2792 2793 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2794 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2795 } 2796 2797 /* Block code */ 2798 vbs = 1; 2799 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2800 if (bs>1 && !(n_vertices%bs)) { 2801 PetscBool is_blocked = PETSC_TRUE; 2802 PetscInt *vary; 2803 if (!sub_schurs->reuse_solver) { 2804 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2805 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2806 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2807 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2808 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2809 for (i=0; i<pcis->n/bs; i++) { 2810 if (vary[i]!=0 && vary[i]!=bs) { 2811 is_blocked = PETSC_FALSE; 2812 break; 2813 } 2814 } 2815 ierr = PetscFree(vary);CHKERRQ(ierr); 2816 } else { 2817 /* Verify directly the R set */ 2818 for (i=0; i<n_R/bs; i++) { 2819 PetscInt j,node=idx_R_local[bs*i]; 2820 for (j=1; j<bs; j++) { 2821 if (node != idx_R_local[bs*i+j]-j) { 2822 is_blocked = PETSC_FALSE; 2823 break; 2824 } 2825 } 2826 } 2827 } 2828 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2829 vbs = bs; 2830 for (i=0;i<n_R/vbs;i++) { 2831 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2832 } 2833 } 2834 } 2835 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2836 if (sub_schurs->reuse_solver) { 2837 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2838 2839 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2840 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2841 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2842 reuse_solver->is_R = pcbddc->is_R_local; 2843 } else { 2844 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2845 } 2846 2847 /* print some info if requested */ 2848 if (pcbddc->dbg_flag) { 2849 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2850 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2851 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2853 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2854 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2855 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2856 } 2857 2858 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2859 if (!sub_schurs->reuse_solver) { 2860 IS is_aux1,is_aux2; 2861 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2862 2863 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2864 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2865 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2866 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2867 for (i=0; i<n_D; i++) { 2868 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2869 } 2870 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2871 for (i=0, j=0; i<n_R; i++) { 2872 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2873 aux_array1[j++] = i; 2874 } 2875 } 2876 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2877 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2878 for (i=0, j=0; i<n_B; i++) { 2879 if (!PetscBTLookup(bitmask,is_indices[i])) { 2880 aux_array2[j++] = i; 2881 } 2882 } 2883 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2884 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2885 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2886 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2887 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2888 2889 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2890 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2891 for (i=0, j=0; i<n_R; i++) { 2892 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2893 aux_array1[j++] = i; 2894 } 2895 } 2896 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2897 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2898 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2899 } 2900 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2901 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2902 } else { 2903 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2904 IS tis; 2905 PetscInt schur_size; 2906 2907 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2908 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2909 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2910 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2912 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2913 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2914 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2915 } 2916 } 2917 PetscFunctionReturn(0); 2918 } 2919 2920 2921 #undef __FUNCT__ 2922 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2923 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2924 { 2925 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2926 PC_IS *pcis = (PC_IS*)pc->data; 2927 PC pc_temp; 2928 Mat A_RR; 2929 MatReuse reuse; 2930 PetscScalar m_one = -1.0; 2931 PetscReal value; 2932 PetscInt n_D,n_R; 2933 PetscBool use_exact,use_exact_reduced,issbaij; 2934 PetscErrorCode ierr; 2935 /* prefixes stuff */ 2936 char dir_prefix[256],neu_prefix[256],str_level[16]; 2937 size_t len; 2938 2939 PetscFunctionBegin; 2940 2941 /* compute prefixes */ 2942 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2943 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2944 if (!pcbddc->current_level) { 2945 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2946 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2947 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2948 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2949 } else { 2950 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2951 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2952 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2953 len -= 15; /* remove "pc_bddc_coarse_" */ 2954 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2955 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2956 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2957 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2958 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2959 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2960 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2961 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2962 } 2963 2964 /* DIRICHLET PROBLEM */ 2965 if (dirichlet) { 2966 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2967 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2968 if (!sub_schurs->reuse_solver) { 2969 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2970 } 2971 if (pcbddc->dbg_flag) { 2972 Mat A_IIn; 2973 2974 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2975 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2976 pcis->A_II = A_IIn; 2977 } 2978 } 2979 if (pcbddc->local_mat->symmetric_set) { 2980 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2981 } 2982 /* Matrix for Dirichlet problem is pcis->A_II */ 2983 n_D = pcis->n - pcis->n_B; 2984 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2985 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2986 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2987 /* default */ 2988 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2989 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2990 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2991 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2992 if (issbaij) { 2993 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2994 } else { 2995 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2996 } 2997 /* Allow user's customization */ 2998 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2999 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3000 } 3001 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3002 if (sub_schurs->reuse_solver) { 3003 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3004 3005 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3006 } 3007 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3008 if (!n_D) { 3009 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3010 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3011 } 3012 /* Set Up KSP for Dirichlet problem of BDDC */ 3013 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3014 /* set ksp_D into pcis data */ 3015 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3016 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3017 pcis->ksp_D = pcbddc->ksp_D; 3018 } 3019 3020 /* NEUMANN PROBLEM */ 3021 A_RR = 0; 3022 if (neumann) { 3023 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3024 PetscInt ibs,mbs; 3025 PetscBool issbaij; 3026 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3027 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3028 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3029 if (pcbddc->ksp_R) { /* already created ksp */ 3030 PetscInt nn_R; 3031 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3032 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3033 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3034 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3035 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3036 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3037 reuse = MAT_INITIAL_MATRIX; 3038 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3039 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3040 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3041 reuse = MAT_INITIAL_MATRIX; 3042 } else { /* safe to reuse the matrix */ 3043 reuse = MAT_REUSE_MATRIX; 3044 } 3045 } 3046 /* last check */ 3047 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3048 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3049 reuse = MAT_INITIAL_MATRIX; 3050 } 3051 } else { /* first time, so we need to create the matrix */ 3052 reuse = MAT_INITIAL_MATRIX; 3053 } 3054 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3055 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3056 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3057 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3058 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3059 if (matis->A == pcbddc->local_mat) { 3060 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3061 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3062 } else { 3063 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3064 } 3065 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3066 if (matis->A == pcbddc->local_mat) { 3067 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3068 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3069 } else { 3070 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3071 } 3072 } 3073 /* extract A_RR */ 3074 if (sub_schurs->reuse_solver) { 3075 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3076 3077 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3078 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3079 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3080 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3081 } else { 3082 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3083 } 3084 } else { 3085 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3086 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3087 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3088 } 3089 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3090 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3091 } 3092 if (pcbddc->local_mat->symmetric_set) { 3093 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3094 } 3095 if (!pcbddc->ksp_R) { /* create object if not present */ 3096 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3097 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3098 /* default */ 3099 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3100 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3101 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3102 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3103 if (issbaij) { 3104 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3105 } else { 3106 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3107 } 3108 /* Allow user's customization */ 3109 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3110 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3111 } 3112 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3113 if (!n_R) { 3114 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3115 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3116 } 3117 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3118 /* Reuse solver if it is present */ 3119 if (sub_schurs->reuse_solver) { 3120 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3121 3122 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3123 } 3124 /* Set Up KSP for Neumann problem of BDDC */ 3125 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3126 } 3127 3128 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 3129 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 3130 if (pcbddc->dbg_flag) { 3131 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3132 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3134 } 3135 if (dirichlet) { /* Dirichlet */ 3136 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3137 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3138 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3139 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3140 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3141 /* need to be adapted? */ 3142 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3143 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3144 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 3145 /* print info */ 3146 if (pcbddc->dbg_flag) { 3147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3149 } 3150 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3151 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3152 } 3153 } 3154 if (neumann) { /* Neumann */ 3155 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3156 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3157 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3158 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3159 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3160 /* need to be adapted? */ 3161 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3162 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3163 /* print info */ 3164 if (pcbddc->dbg_flag) { 3165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3166 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3167 } 3168 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3169 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3170 } 3171 } 3172 } 3173 /* free Neumann problem's matrix */ 3174 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3175 PetscFunctionReturn(0); 3176 } 3177 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3180 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3181 { 3182 PetscErrorCode ierr; 3183 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3184 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3185 3186 PetscFunctionBegin; 3187 if (!sub_schurs->reuse_solver) { 3188 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3189 } 3190 if (!pcbddc->switch_static) { 3191 if (applytranspose && pcbddc->local_auxmat1) { 3192 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3193 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3194 } 3195 if (!sub_schurs->reuse_solver) { 3196 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3197 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3198 } else { 3199 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3200 3201 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3202 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3203 } 3204 } else { 3205 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3206 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3207 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3208 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3209 if (applytranspose && pcbddc->local_auxmat1) { 3210 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3211 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3212 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3213 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3214 } 3215 } 3216 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3217 if (applytranspose) { 3218 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3219 } else { 3220 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3221 } 3222 } else { 3223 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3224 3225 if (applytranspose) { 3226 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3227 } else { 3228 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3229 } 3230 } 3231 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3232 if (!pcbddc->switch_static) { 3233 if (!sub_schurs->reuse_solver) { 3234 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3236 } else { 3237 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3238 3239 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3240 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3241 } 3242 if (!applytranspose && pcbddc->local_auxmat1) { 3243 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3244 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3245 } 3246 } else { 3247 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3248 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3249 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3250 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3251 if (!applytranspose && pcbddc->local_auxmat1) { 3252 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3253 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3254 } 3255 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3256 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3257 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3258 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3259 } 3260 PetscFunctionReturn(0); 3261 } 3262 3263 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3266 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3267 { 3268 PetscErrorCode ierr; 3269 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3270 PC_IS* pcis = (PC_IS*) (pc->data); 3271 const PetscScalar zero = 0.0; 3272 3273 PetscFunctionBegin; 3274 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3275 if (!pcbddc->benign_apply_coarse_only) { 3276 if (applytranspose) { 3277 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3278 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3279 } else { 3280 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3281 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3282 } 3283 } else { 3284 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3285 } 3286 3287 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3288 if (pcbddc->benign_n) { 3289 PetscScalar *array; 3290 PetscInt j; 3291 3292 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3293 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3294 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3295 } 3296 3297 /* start communications from local primal nodes to rhs of coarse solver */ 3298 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3299 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3300 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3301 3302 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3303 /* TODO remove null space when doing multilevel */ 3304 if (pcbddc->coarse_ksp) { 3305 Mat coarse_mat; 3306 Vec rhs,sol; 3307 MatNullSpace nullsp; 3308 PetscBool isbddc = PETSC_FALSE; 3309 3310 if (pcbddc->benign_have_null) { 3311 PC coarse_pc; 3312 3313 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3314 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3315 /* we need to propagate to coarser levels the need for a possible benign correction */ 3316 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3317 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3318 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3319 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3320 } 3321 } 3322 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3323 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3324 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3325 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3326 if (nullsp) { 3327 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3328 } 3329 if (applytranspose) { 3330 if (pcbddc->benign_apply_coarse_only) { 3331 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3332 } else { 3333 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3334 } 3335 } else { 3336 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3337 PC coarse_pc; 3338 3339 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3340 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3341 ierr = PCApply(coarse_pc,rhs,sol);CHKERRQ(ierr); 3342 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3343 } else { 3344 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3345 } 3346 } 3347 /* we don't the benign correction at coarser levels anymore */ 3348 if (pcbddc->benign_have_null && isbddc) { 3349 PC coarse_pc; 3350 PC_BDDC* coarsepcbddc; 3351 3352 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3353 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3354 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3355 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3356 } 3357 if (nullsp) { 3358 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3359 } 3360 } 3361 3362 /* Local solution on R nodes */ 3363 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3364 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3365 } 3366 /* communications from coarse sol to local primal nodes */ 3367 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3368 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3369 3370 /* Sum contributions from the two levels */ 3371 if (!pcbddc->benign_apply_coarse_only) { 3372 if (applytranspose) { 3373 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3374 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3375 } else { 3376 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3377 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3378 } 3379 /* store p0 */ 3380 if (pcbddc->benign_n) { 3381 PetscScalar *array; 3382 PetscInt j; 3383 3384 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3385 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3386 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3387 } 3388 } else { /* expand the coarse solution */ 3389 if (applytranspose) { 3390 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3391 } else { 3392 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3393 } 3394 } 3395 PetscFunctionReturn(0); 3396 } 3397 3398 #undef __FUNCT__ 3399 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3400 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3401 { 3402 PetscErrorCode ierr; 3403 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3404 PetscScalar *array; 3405 Vec from,to; 3406 3407 PetscFunctionBegin; 3408 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3409 from = pcbddc->coarse_vec; 3410 to = pcbddc->vec1_P; 3411 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3412 Vec tvec; 3413 3414 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3415 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3416 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3417 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3418 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3419 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3420 } 3421 } else { /* from local to global -> put data in coarse right hand side */ 3422 from = pcbddc->vec1_P; 3423 to = pcbddc->coarse_vec; 3424 } 3425 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3426 PetscFunctionReturn(0); 3427 } 3428 3429 #undef __FUNCT__ 3430 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3431 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3432 { 3433 PetscErrorCode ierr; 3434 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3435 PetscScalar *array; 3436 Vec from,to; 3437 3438 PetscFunctionBegin; 3439 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3440 from = pcbddc->coarse_vec; 3441 to = pcbddc->vec1_P; 3442 } else { /* from local to global -> put data in coarse right hand side */ 3443 from = pcbddc->vec1_P; 3444 to = pcbddc->coarse_vec; 3445 } 3446 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3447 if (smode == SCATTER_FORWARD) { 3448 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3449 Vec tvec; 3450 3451 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3452 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3453 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3454 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3455 } 3456 } else { 3457 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3458 ierr = VecResetArray(from);CHKERRQ(ierr); 3459 } 3460 } 3461 PetscFunctionReturn(0); 3462 } 3463 3464 /* uncomment for testing purposes */ 3465 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3466 #undef __FUNCT__ 3467 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3468 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3469 { 3470 PetscErrorCode ierr; 3471 PC_IS* pcis = (PC_IS*)(pc->data); 3472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3473 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3474 /* one and zero */ 3475 PetscScalar one=1.0,zero=0.0; 3476 /* space to store constraints and their local indices */ 3477 PetscScalar *constraints_data; 3478 PetscInt *constraints_idxs,*constraints_idxs_B; 3479 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3480 PetscInt *constraints_n; 3481 /* iterators */ 3482 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3483 /* BLAS integers */ 3484 PetscBLASInt lwork,lierr; 3485 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3486 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3487 /* reuse */ 3488 PetscInt olocal_primal_size,olocal_primal_size_cc; 3489 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3490 /* change of basis */ 3491 PetscBool qr_needed; 3492 PetscBT change_basis,qr_needed_idx; 3493 /* auxiliary stuff */ 3494 PetscInt *nnz,*is_indices; 3495 PetscInt ncc; 3496 /* some quantities */ 3497 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3498 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3499 3500 PetscFunctionBegin; 3501 /* Destroy Mat objects computed previously */ 3502 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3503 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3504 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3505 /* save info on constraints from previous setup (if any) */ 3506 olocal_primal_size = pcbddc->local_primal_size; 3507 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3508 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3509 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3510 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3511 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3512 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3513 3514 if (!pcbddc->adaptive_selection) { 3515 IS ISForVertices,*ISForFaces,*ISForEdges; 3516 MatNullSpace nearnullsp; 3517 const Vec *nearnullvecs; 3518 Vec *localnearnullsp; 3519 PetscScalar *array; 3520 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3521 PetscBool nnsp_has_cnst; 3522 /* LAPACK working arrays for SVD or POD */ 3523 PetscBool skip_lapack,boolforchange; 3524 PetscScalar *work; 3525 PetscReal *singular_vals; 3526 #if defined(PETSC_USE_COMPLEX) 3527 PetscReal *rwork; 3528 #endif 3529 #if defined(PETSC_MISSING_LAPACK_GESVD) 3530 PetscScalar *temp_basis,*correlation_mat; 3531 #else 3532 PetscBLASInt dummy_int=1; 3533 PetscScalar dummy_scalar=1.; 3534 #endif 3535 3536 /* Get index sets for faces, edges and vertices from graph */ 3537 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3538 /* print some info */ 3539 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3540 PetscInt nv; 3541 3542 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3543 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3544 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3548 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3549 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3550 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3551 } 3552 3553 /* free unneeded index sets */ 3554 if (!pcbddc->use_vertices) { 3555 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3556 } 3557 if (!pcbddc->use_edges) { 3558 for (i=0;i<n_ISForEdges;i++) { 3559 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3560 } 3561 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3562 n_ISForEdges = 0; 3563 } 3564 if (!pcbddc->use_faces) { 3565 for (i=0;i<n_ISForFaces;i++) { 3566 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3567 } 3568 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3569 n_ISForFaces = 0; 3570 } 3571 3572 #if defined(PETSC_USE_DEBUG) 3573 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3574 Also use_change_of_basis should be consistent among processors */ 3575 if (pcbddc->NullSpace) { 3576 PetscBool tbool[2],gbool[2]; 3577 3578 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3579 pcbddc->use_change_of_basis = PETSC_TRUE; 3580 if (!ISForEdges) { 3581 pcbddc->use_change_on_faces = PETSC_TRUE; 3582 } 3583 } 3584 tbool[0] = pcbddc->use_change_of_basis; 3585 tbool[1] = pcbddc->use_change_on_faces; 3586 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3587 pcbddc->use_change_of_basis = gbool[0]; 3588 pcbddc->use_change_on_faces = gbool[1]; 3589 } 3590 #endif 3591 3592 /* check if near null space is attached to global mat */ 3593 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3594 if (nearnullsp) { 3595 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3596 /* remove any stored info */ 3597 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3598 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3599 /* store information for BDDC solver reuse */ 3600 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3601 pcbddc->onearnullspace = nearnullsp; 3602 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3603 for (i=0;i<nnsp_size;i++) { 3604 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3605 } 3606 } else { /* if near null space is not provided BDDC uses constants by default */ 3607 nnsp_size = 0; 3608 nnsp_has_cnst = PETSC_TRUE; 3609 } 3610 /* get max number of constraints on a single cc */ 3611 max_constraints = nnsp_size; 3612 if (nnsp_has_cnst) max_constraints++; 3613 3614 /* 3615 Evaluate maximum storage size needed by the procedure 3616 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3617 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3618 There can be multiple constraints per connected component 3619 */ 3620 n_vertices = 0; 3621 if (ISForVertices) { 3622 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3623 } 3624 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3625 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3626 3627 total_counts = n_ISForFaces+n_ISForEdges; 3628 total_counts *= max_constraints; 3629 total_counts += n_vertices; 3630 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3631 3632 total_counts = 0; 3633 max_size_of_constraint = 0; 3634 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3635 IS used_is; 3636 if (i<n_ISForEdges) { 3637 used_is = ISForEdges[i]; 3638 } else { 3639 used_is = ISForFaces[i-n_ISForEdges]; 3640 } 3641 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3642 total_counts += j; 3643 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3644 } 3645 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3646 3647 /* get local part of global near null space vectors */ 3648 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3649 for (k=0;k<nnsp_size;k++) { 3650 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3651 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3652 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3653 } 3654 3655 /* whether or not to skip lapack calls */ 3656 skip_lapack = PETSC_TRUE; 3657 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3658 3659 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3660 if (!skip_lapack) { 3661 PetscScalar temp_work; 3662 3663 #if defined(PETSC_MISSING_LAPACK_GESVD) 3664 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3665 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3666 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3667 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3668 #if defined(PETSC_USE_COMPLEX) 3669 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3670 #endif 3671 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3672 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3673 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3674 lwork = -1; 3675 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3676 #if !defined(PETSC_USE_COMPLEX) 3677 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3678 #else 3679 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3680 #endif 3681 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3682 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3683 #else /* on missing GESVD */ 3684 /* SVD */ 3685 PetscInt max_n,min_n; 3686 max_n = max_size_of_constraint; 3687 min_n = max_constraints; 3688 if (max_size_of_constraint < max_constraints) { 3689 min_n = max_size_of_constraint; 3690 max_n = max_constraints; 3691 } 3692 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3693 #if defined(PETSC_USE_COMPLEX) 3694 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3695 #endif 3696 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3697 lwork = -1; 3698 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3699 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3700 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3701 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3702 #if !defined(PETSC_USE_COMPLEX) 3703 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3704 #else 3705 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3706 #endif 3707 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3708 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3709 #endif /* on missing GESVD */ 3710 /* Allocate optimal workspace */ 3711 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3712 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3713 } 3714 /* Now we can loop on constraining sets */ 3715 total_counts = 0; 3716 constraints_idxs_ptr[0] = 0; 3717 constraints_data_ptr[0] = 0; 3718 /* vertices */ 3719 if (n_vertices) { 3720 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3721 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3722 for (i=0;i<n_vertices;i++) { 3723 constraints_n[total_counts] = 1; 3724 constraints_data[total_counts] = 1.0; 3725 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3726 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3727 total_counts++; 3728 } 3729 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3730 n_vertices = total_counts; 3731 } 3732 3733 /* edges and faces */ 3734 total_counts_cc = total_counts; 3735 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3736 IS used_is; 3737 PetscBool idxs_copied = PETSC_FALSE; 3738 3739 if (ncc<n_ISForEdges) { 3740 used_is = ISForEdges[ncc]; 3741 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3742 } else { 3743 used_is = ISForFaces[ncc-n_ISForEdges]; 3744 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3745 } 3746 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3747 3748 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3749 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3750 /* change of basis should not be performed on local periodic nodes */ 3751 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3752 if (nnsp_has_cnst) { 3753 PetscScalar quad_value; 3754 3755 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3756 idxs_copied = PETSC_TRUE; 3757 3758 if (!pcbddc->use_nnsp_true) { 3759 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3760 } else { 3761 quad_value = 1.0; 3762 } 3763 for (j=0;j<size_of_constraint;j++) { 3764 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3765 } 3766 temp_constraints++; 3767 total_counts++; 3768 } 3769 for (k=0;k<nnsp_size;k++) { 3770 PetscReal real_value; 3771 PetscScalar *ptr_to_data; 3772 3773 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3774 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3775 for (j=0;j<size_of_constraint;j++) { 3776 ptr_to_data[j] = array[is_indices[j]]; 3777 } 3778 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3779 /* check if array is null on the connected component */ 3780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3781 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3782 if (real_value > 0.0) { /* keep indices and values */ 3783 temp_constraints++; 3784 total_counts++; 3785 if (!idxs_copied) { 3786 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3787 idxs_copied = PETSC_TRUE; 3788 } 3789 } 3790 } 3791 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3792 valid_constraints = temp_constraints; 3793 if (!pcbddc->use_nnsp_true && temp_constraints) { 3794 if (temp_constraints == 1) { /* just normalize the constraint */ 3795 PetscScalar norm,*ptr_to_data; 3796 3797 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3798 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3799 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3800 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3801 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3802 } else { /* perform SVD */ 3803 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3804 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3805 3806 #if defined(PETSC_MISSING_LAPACK_GESVD) 3807 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3808 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3809 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3810 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3811 from that computed using LAPACKgesvd 3812 -> This is due to a different computation of eigenvectors in LAPACKheev 3813 -> The quality of the POD-computed basis will be the same */ 3814 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3815 /* Store upper triangular part of correlation matrix */ 3816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3817 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3818 for (j=0;j<temp_constraints;j++) { 3819 for (k=0;k<j+1;k++) { 3820 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3821 } 3822 } 3823 /* compute eigenvalues and eigenvectors of correlation matrix */ 3824 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3825 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3826 #if !defined(PETSC_USE_COMPLEX) 3827 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3828 #else 3829 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3830 #endif 3831 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3832 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3833 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3834 j = 0; 3835 while (j < temp_constraints && singular_vals[j] < tol) j++; 3836 total_counts = total_counts-j; 3837 valid_constraints = temp_constraints-j; 3838 /* scale and copy POD basis into used quadrature memory */ 3839 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3840 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3841 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3842 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3843 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3844 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3845 if (j<temp_constraints) { 3846 PetscInt ii; 3847 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3848 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3849 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3850 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3851 for (k=0;k<temp_constraints-j;k++) { 3852 for (ii=0;ii<size_of_constraint;ii++) { 3853 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3854 } 3855 } 3856 } 3857 #else /* on missing GESVD */ 3858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3859 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3860 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3861 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3862 #if !defined(PETSC_USE_COMPLEX) 3863 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3864 #else 3865 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3866 #endif 3867 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3868 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3869 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3870 k = temp_constraints; 3871 if (k > size_of_constraint) k = size_of_constraint; 3872 j = 0; 3873 while (j < k && singular_vals[k-j-1] < tol) j++; 3874 valid_constraints = k-j; 3875 total_counts = total_counts-temp_constraints+valid_constraints; 3876 #endif /* on missing GESVD */ 3877 } 3878 } 3879 /* update pointers information */ 3880 if (valid_constraints) { 3881 constraints_n[total_counts_cc] = valid_constraints; 3882 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3883 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3884 /* set change_of_basis flag */ 3885 if (boolforchange) { 3886 PetscBTSet(change_basis,total_counts_cc); 3887 } 3888 total_counts_cc++; 3889 } 3890 } 3891 /* free workspace */ 3892 if (!skip_lapack) { 3893 ierr = PetscFree(work);CHKERRQ(ierr); 3894 #if defined(PETSC_USE_COMPLEX) 3895 ierr = PetscFree(rwork);CHKERRQ(ierr); 3896 #endif 3897 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3898 #if defined(PETSC_MISSING_LAPACK_GESVD) 3899 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3900 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3901 #endif 3902 } 3903 for (k=0;k<nnsp_size;k++) { 3904 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3905 } 3906 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3907 /* free index sets of faces, edges and vertices */ 3908 for (i=0;i<n_ISForFaces;i++) { 3909 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3910 } 3911 if (n_ISForFaces) { 3912 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3913 } 3914 for (i=0;i<n_ISForEdges;i++) { 3915 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3916 } 3917 if (n_ISForEdges) { 3918 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3919 } 3920 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3921 } else { 3922 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3923 3924 total_counts = 0; 3925 n_vertices = 0; 3926 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3927 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3928 } 3929 max_constraints = 0; 3930 total_counts_cc = 0; 3931 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3932 total_counts += pcbddc->adaptive_constraints_n[i]; 3933 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3934 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3935 } 3936 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3937 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3938 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3939 constraints_data = pcbddc->adaptive_constraints_data; 3940 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3941 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3942 total_counts_cc = 0; 3943 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3944 if (pcbddc->adaptive_constraints_n[i]) { 3945 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3946 } 3947 } 3948 #if 0 3949 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3950 for (i=0;i<total_counts_cc;i++) { 3951 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3952 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3953 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3954 printf(" %d",constraints_idxs[j]); 3955 } 3956 printf("\n"); 3957 printf("number of cc: %d\n",constraints_n[i]); 3958 } 3959 for (i=0;i<n_vertices;i++) { 3960 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3961 } 3962 for (i=0;i<sub_schurs->n_subs;i++) { 3963 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3964 } 3965 #endif 3966 3967 max_size_of_constraint = 0; 3968 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3969 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3970 /* Change of basis */ 3971 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3972 if (pcbddc->use_change_of_basis) { 3973 for (i=0;i<sub_schurs->n_subs;i++) { 3974 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3975 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3976 } 3977 } 3978 } 3979 } 3980 pcbddc->local_primal_size = total_counts; 3981 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3982 3983 /* map constraints_idxs in boundary numbering */ 3984 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3985 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3986 3987 /* Create constraint matrix */ 3988 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3989 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3990 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3991 3992 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3993 /* determine if a QR strategy is needed for change of basis */ 3994 qr_needed = PETSC_FALSE; 3995 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3996 total_primal_vertices=0; 3997 pcbddc->local_primal_size_cc = 0; 3998 for (i=0;i<total_counts_cc;i++) { 3999 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4000 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4001 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4002 pcbddc->local_primal_size_cc += 1; 4003 } else if (PetscBTLookup(change_basis,i)) { 4004 for (k=0;k<constraints_n[i];k++) { 4005 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4006 } 4007 pcbddc->local_primal_size_cc += constraints_n[i]; 4008 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4009 PetscBTSet(qr_needed_idx,i); 4010 qr_needed = PETSC_TRUE; 4011 } 4012 } else { 4013 pcbddc->local_primal_size_cc += 1; 4014 } 4015 } 4016 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4017 pcbddc->n_vertices = total_primal_vertices; 4018 /* permute indices in order to have a sorted set of vertices */ 4019 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4020 4021 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4022 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4023 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4024 4025 /* nonzero structure of constraint matrix */ 4026 /* and get reference dof for local constraints */ 4027 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4028 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4029 4030 j = total_primal_vertices; 4031 total_counts = total_primal_vertices; 4032 cum = total_primal_vertices; 4033 for (i=n_vertices;i<total_counts_cc;i++) { 4034 if (!PetscBTLookup(change_basis,i)) { 4035 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4036 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4037 cum++; 4038 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4039 for (k=0;k<constraints_n[i];k++) { 4040 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4041 nnz[j+k] = size_of_constraint; 4042 } 4043 j += constraints_n[i]; 4044 } 4045 } 4046 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4047 ierr = PetscFree(nnz);CHKERRQ(ierr); 4048 4049 /* set values in constraint matrix */ 4050 for (i=0;i<total_primal_vertices;i++) { 4051 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4052 } 4053 total_counts = total_primal_vertices; 4054 for (i=n_vertices;i<total_counts_cc;i++) { 4055 if (!PetscBTLookup(change_basis,i)) { 4056 PetscInt *cols; 4057 4058 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4059 cols = constraints_idxs+constraints_idxs_ptr[i]; 4060 for (k=0;k<constraints_n[i];k++) { 4061 PetscInt row = total_counts+k; 4062 PetscScalar *vals; 4063 4064 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4065 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4066 } 4067 total_counts += constraints_n[i]; 4068 } 4069 } 4070 /* assembling */ 4071 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4072 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4073 4074 /* 4075 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4076 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4077 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4078 */ 4079 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4080 if (pcbddc->use_change_of_basis) { 4081 /* dual and primal dofs on a single cc */ 4082 PetscInt dual_dofs,primal_dofs; 4083 /* working stuff for GEQRF */ 4084 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4085 PetscBLASInt lqr_work; 4086 /* working stuff for UNGQR */ 4087 PetscScalar *gqr_work,lgqr_work_t; 4088 PetscBLASInt lgqr_work; 4089 /* working stuff for TRTRS */ 4090 PetscScalar *trs_rhs; 4091 PetscBLASInt Blas_NRHS; 4092 /* pointers for values insertion into change of basis matrix */ 4093 PetscInt *start_rows,*start_cols; 4094 PetscScalar *start_vals; 4095 /* working stuff for values insertion */ 4096 PetscBT is_primal; 4097 PetscInt *aux_primal_numbering_B; 4098 /* matrix sizes */ 4099 PetscInt global_size,local_size; 4100 /* temporary change of basis */ 4101 Mat localChangeOfBasisMatrix; 4102 /* extra space for debugging */ 4103 PetscScalar *dbg_work; 4104 4105 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4106 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4107 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4108 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4109 /* nonzeros for local mat */ 4110 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4111 if (!pcbddc->benign_change || pcbddc->fake_change) { 4112 for (i=0;i<pcis->n;i++) nnz[i]=1; 4113 } else { 4114 const PetscInt *ii; 4115 PetscInt n; 4116 PetscBool flg_row; 4117 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4118 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4119 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4120 } 4121 for (i=n_vertices;i<total_counts_cc;i++) { 4122 if (PetscBTLookup(change_basis,i)) { 4123 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4124 if (PetscBTLookup(qr_needed_idx,i)) { 4125 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4126 } else { 4127 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4128 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4129 } 4130 } 4131 } 4132 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4133 ierr = PetscFree(nnz);CHKERRQ(ierr); 4134 /* Set interior change in the matrix */ 4135 if (!pcbddc->benign_change || pcbddc->fake_change) { 4136 for (i=0;i<pcis->n;i++) { 4137 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4138 } 4139 } else { 4140 const PetscInt *ii,*jj; 4141 PetscScalar *aa; 4142 PetscInt n; 4143 PetscBool flg_row; 4144 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4145 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4146 for (i=0;i<n;i++) { 4147 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4148 } 4149 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4150 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4151 } 4152 4153 if (pcbddc->dbg_flag) { 4154 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4155 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4156 } 4157 4158 4159 /* Now we loop on the constraints which need a change of basis */ 4160 /* 4161 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4162 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4163 4164 Basic blocks of change of basis matrix T computed by 4165 4166 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4167 4168 | 1 0 ... 0 s_1/S | 4169 | 0 1 ... 0 s_2/S | 4170 | ... | 4171 | 0 ... 1 s_{n-1}/S | 4172 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4173 4174 with S = \sum_{i=1}^n s_i^2 4175 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4176 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4177 4178 - QR decomposition of constraints otherwise 4179 */ 4180 if (qr_needed) { 4181 /* space to store Q */ 4182 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4183 /* first we issue queries for optimal work */ 4184 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4185 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4186 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4187 lqr_work = -1; 4188 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4189 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4190 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4191 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4192 lgqr_work = -1; 4193 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4194 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4195 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4196 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4197 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4198 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4199 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4200 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4201 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4202 /* array to store scaling factors for reflectors */ 4203 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4204 /* array to store rhs and solution of triangular solver */ 4205 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4206 /* allocating workspace for check */ 4207 if (pcbddc->dbg_flag) { 4208 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4209 } 4210 } 4211 /* array to store whether a node is primal or not */ 4212 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4213 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4214 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4215 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4216 for (i=0;i<total_primal_vertices;i++) { 4217 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4218 } 4219 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4220 4221 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4222 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4223 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4224 if (PetscBTLookup(change_basis,total_counts)) { 4225 /* get constraint info */ 4226 primal_dofs = constraints_n[total_counts]; 4227 dual_dofs = size_of_constraint-primal_dofs; 4228 4229 if (pcbddc->dbg_flag) { 4230 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4231 } 4232 4233 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4234 4235 /* copy quadrature constraints for change of basis check */ 4236 if (pcbddc->dbg_flag) { 4237 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4238 } 4239 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4240 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4241 4242 /* compute QR decomposition of constraints */ 4243 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4244 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4245 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4246 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4247 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4248 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4249 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4250 4251 /* explictly compute R^-T */ 4252 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4253 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4254 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4255 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4256 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4257 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4258 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4259 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4260 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4261 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4262 4263 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4264 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4265 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4266 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4267 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4268 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4269 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4270 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4271 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4272 4273 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4274 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4275 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4276 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4277 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4278 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4279 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4280 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4281 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4282 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4283 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4284 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4285 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4286 4287 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4288 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4289 /* insert cols for primal dofs */ 4290 for (j=0;j<primal_dofs;j++) { 4291 start_vals = &qr_basis[j*size_of_constraint]; 4292 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4293 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4294 } 4295 /* insert cols for dual dofs */ 4296 for (j=0,k=0;j<dual_dofs;k++) { 4297 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4298 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4299 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4300 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4301 j++; 4302 } 4303 } 4304 4305 /* check change of basis */ 4306 if (pcbddc->dbg_flag) { 4307 PetscInt ii,jj; 4308 PetscBool valid_qr=PETSC_TRUE; 4309 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4310 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4313 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4314 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4315 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4316 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4317 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4318 for (jj=0;jj<size_of_constraint;jj++) { 4319 for (ii=0;ii<primal_dofs;ii++) { 4320 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4321 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4322 } 4323 } 4324 if (!valid_qr) { 4325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4326 for (jj=0;jj<size_of_constraint;jj++) { 4327 for (ii=0;ii<primal_dofs;ii++) { 4328 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4329 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4330 } 4331 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4332 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4333 } 4334 } 4335 } 4336 } else { 4337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4338 } 4339 } 4340 } else { /* simple transformation block */ 4341 PetscInt row,col; 4342 PetscScalar val,norm; 4343 4344 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4345 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4346 for (j=0;j<size_of_constraint;j++) { 4347 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4348 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4349 if (!PetscBTLookup(is_primal,row_B)) { 4350 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4351 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4352 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4353 } else { 4354 for (k=0;k<size_of_constraint;k++) { 4355 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4356 if (row != col) { 4357 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4358 } else { 4359 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4360 } 4361 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4362 } 4363 } 4364 } 4365 if (pcbddc->dbg_flag) { 4366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4367 } 4368 } 4369 } else { 4370 if (pcbddc->dbg_flag) { 4371 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4372 } 4373 } 4374 } 4375 4376 /* free workspace */ 4377 if (qr_needed) { 4378 if (pcbddc->dbg_flag) { 4379 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4380 } 4381 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4382 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4383 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4384 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4385 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4386 } 4387 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4388 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4389 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4390 4391 /* assembling of global change of variable */ 4392 if (!pcbddc->fake_change) { 4393 Mat tmat; 4394 PetscInt bs; 4395 4396 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4397 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4398 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4399 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4400 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4401 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4402 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4403 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4404 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4405 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4406 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4407 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4408 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4409 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4410 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4411 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4412 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4413 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4414 4415 /* check */ 4416 if (pcbddc->dbg_flag) { 4417 PetscReal error; 4418 Vec x,x_change; 4419 4420 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4421 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4422 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4423 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4424 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4425 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4426 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4427 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4428 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4429 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4430 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4431 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4432 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4433 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4434 ierr = VecDestroy(&x);CHKERRQ(ierr); 4435 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4436 } 4437 /* adapt sub_schurs computed (if any) */ 4438 if (pcbddc->use_deluxe_scaling) { 4439 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4440 4441 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4442 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4443 } 4444 if (sub_schurs->S_Ej_all) { 4445 Mat S_new,tmat; 4446 IS is_all_N,is_V_Sall = NULL; 4447 4448 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4449 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4450 if (pcbddc->deluxe_zerorows) { 4451 ISLocalToGlobalMapping NtoSall; 4452 IS is_V; 4453 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4454 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4455 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4456 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4457 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4458 } 4459 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4460 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4461 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4462 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4463 if (pcbddc->deluxe_zerorows) { 4464 const PetscScalar *array; 4465 const PetscInt *idxs_V,*idxs_all; 4466 PetscInt i,n_V; 4467 4468 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4469 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4470 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4471 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4472 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4473 for (i=0;i<n_V;i++) { 4474 PetscScalar val; 4475 PetscInt idx; 4476 4477 idx = idxs_V[i]; 4478 val = array[idxs_all[idxs_V[i]]]; 4479 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4480 } 4481 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4482 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4483 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4484 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4485 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4486 } 4487 sub_schurs->S_Ej_all = S_new; 4488 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4489 if (sub_schurs->sum_S_Ej_all) { 4490 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4491 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4492 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4493 if (pcbddc->deluxe_zerorows) { 4494 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4495 } 4496 sub_schurs->sum_S_Ej_all = S_new; 4497 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4498 } 4499 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4500 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4501 } 4502 /* destroy any change of basis context in sub_schurs */ 4503 if (sub_schurs->change) { 4504 PetscInt i; 4505 4506 for (i=0;i<sub_schurs->n_subs;i++) { 4507 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4508 } 4509 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4510 } 4511 } 4512 if (pcbddc->switch_static) { /* need to save the local change */ 4513 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4514 } else { 4515 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4516 } 4517 /* determine if any process has changed the pressures locally */ 4518 pcbddc->change_interior = pcbddc->benign_have_null; 4519 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4520 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4521 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4522 pcbddc->use_qr_single = qr_needed; 4523 } 4524 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4525 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4526 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4527 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4528 } else { 4529 Mat benign_global = NULL; 4530 if (pcbddc->benign_have_null) { 4531 Mat tmat; 4532 4533 pcbddc->change_interior = PETSC_TRUE; 4534 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4535 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4536 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4537 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4538 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4539 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4540 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4541 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4542 if (pcbddc->benign_change) { 4543 Mat M; 4544 4545 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4546 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4547 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4548 ierr = MatDestroy(&M);CHKERRQ(ierr); 4549 } else { 4550 Mat eye; 4551 PetscScalar *array; 4552 4553 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4554 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4555 for (i=0;i<pcis->n;i++) { 4556 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4557 } 4558 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4559 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4560 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4561 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4562 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4563 } 4564 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4565 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4566 } 4567 if (pcbddc->user_ChangeOfBasisMatrix) { 4568 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4569 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4570 } else if (pcbddc->benign_have_null) { 4571 pcbddc->ChangeOfBasisMatrix = benign_global; 4572 } 4573 } 4574 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4575 IS is_global; 4576 const PetscInt *gidxs; 4577 4578 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4579 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4580 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4581 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4582 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4583 } 4584 } 4585 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4586 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4587 } 4588 4589 if (!pcbddc->fake_change) { 4590 /* add pressure dofs to set of primal nodes for numbering purposes */ 4591 for (i=0;i<pcbddc->benign_n;i++) { 4592 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4593 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4594 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4595 pcbddc->local_primal_size_cc++; 4596 pcbddc->local_primal_size++; 4597 } 4598 4599 /* check if a new primal space has been introduced (also take into account benign trick) */ 4600 pcbddc->new_primal_space_local = PETSC_TRUE; 4601 if (olocal_primal_size == pcbddc->local_primal_size) { 4602 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4603 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4604 if (!pcbddc->new_primal_space_local) { 4605 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4606 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4607 } 4608 } 4609 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4610 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4611 } 4612 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4613 4614 /* flush dbg viewer */ 4615 if (pcbddc->dbg_flag) { 4616 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4617 } 4618 4619 /* free workspace */ 4620 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4621 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4622 if (!pcbddc->adaptive_selection) { 4623 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4624 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4625 } else { 4626 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4627 pcbddc->adaptive_constraints_idxs_ptr, 4628 pcbddc->adaptive_constraints_data_ptr, 4629 pcbddc->adaptive_constraints_idxs, 4630 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4631 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4632 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4633 } 4634 PetscFunctionReturn(0); 4635 } 4636 4637 #undef __FUNCT__ 4638 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4639 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4640 { 4641 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4642 PC_IS *pcis = (PC_IS*)pc->data; 4643 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4644 PetscInt ierr,i,N; 4645 4646 PetscFunctionBegin; 4647 /* Reset previously computed graph */ 4648 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4649 /* Init local Graph struct */ 4650 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4651 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4652 4653 /* Check validity of the csr graph passed in by the user */ 4654 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4655 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4656 } 4657 4658 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4659 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4660 PetscInt *xadj,*adjncy; 4661 PetscInt nvtxs; 4662 PetscBool flg_row=PETSC_FALSE; 4663 4664 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4665 if (flg_row) { 4666 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4667 pcbddc->computed_rowadj = PETSC_TRUE; 4668 } 4669 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4670 } 4671 if (pcbddc->dbg_flag) { 4672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4673 } 4674 4675 /* Setup of Graph */ 4676 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4677 4678 /* attach info on disconnected subdomains if present */ 4679 if (pcbddc->n_local_subs) { 4680 PetscInt *local_subs; 4681 4682 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4683 for (i=0;i<pcbddc->n_local_subs;i++) { 4684 const PetscInt *idxs; 4685 PetscInt nl,j; 4686 4687 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4688 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4689 for (j=0;j<nl;j++) { 4690 local_subs[idxs[j]] = i; 4691 } 4692 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4693 } 4694 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4695 pcbddc->mat_graph->local_subs = local_subs; 4696 } 4697 4698 /* Graph's connected components analysis */ 4699 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4700 PetscFunctionReturn(0); 4701 } 4702 4703 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4704 #undef __FUNCT__ 4705 #define __FUNCT__ "PCBDDCSubsetNumbering" 4706 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4707 { 4708 PetscSF sf; 4709 PetscLayout map; 4710 const PetscInt *idxs; 4711 PetscInt *leaf_data,*root_data,*gidxs; 4712 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4713 PetscInt n_n,nlocals,start,first_index; 4714 PetscMPIInt commsize; 4715 PetscBool first_found; 4716 PetscErrorCode ierr; 4717 4718 PetscFunctionBegin; 4719 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4720 if (subset_mult) { 4721 PetscCheckSameComm(subset,1,subset_mult,2); 4722 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4723 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4724 } 4725 /* create workspace layout for computing global indices of subset */ 4726 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4727 lbounds[0] = lbounds[1] = 0; 4728 for (i=0;i<n;i++) { 4729 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4730 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4731 } 4732 lbounds[0] = -lbounds[0]; 4733 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4734 gbounds[0] = -gbounds[0]; 4735 N = gbounds[1] - gbounds[0] + 1; 4736 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4737 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4738 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4739 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4740 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4741 4742 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4743 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4744 if (subset_mult) { 4745 const PetscInt* idxs_mult; 4746 4747 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4748 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4749 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4750 } else { 4751 for (i=0;i<n;i++) leaf_data[i] = 1; 4752 } 4753 /* local size of new subset */ 4754 n_n = 0; 4755 for (i=0;i<n;i++) n_n += leaf_data[i]; 4756 4757 /* global indexes in layout */ 4758 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4759 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4760 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4761 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4762 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4763 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4764 4765 /* reduce from leaves to roots */ 4766 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4767 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4768 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4769 4770 /* count indexes in local part of layout */ 4771 nlocals = 0; 4772 first_index = -1; 4773 first_found = PETSC_FALSE; 4774 for (i=0;i<Nl;i++) { 4775 if (!first_found && root_data[i]) { 4776 first_found = PETSC_TRUE; 4777 first_index = i; 4778 } 4779 nlocals += root_data[i]; 4780 } 4781 4782 /* cumulative of number of indexes and size of subset without holes */ 4783 #if defined(PETSC_HAVE_MPI_EXSCAN) 4784 start = 0; 4785 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4786 #else 4787 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4788 start = start-nlocals; 4789 #endif 4790 4791 if (N_n) { /* compute total size of new subset if requested */ 4792 *N_n = start + nlocals; 4793 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4794 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4795 } 4796 4797 /* adapt root data with cumulative */ 4798 if (first_found) { 4799 PetscInt old_index; 4800 4801 root_data[first_index] += start; 4802 old_index = first_index; 4803 for (i=first_index+1;i<Nl;i++) { 4804 if (root_data[i]) { 4805 root_data[i] += root_data[old_index]; 4806 old_index = i; 4807 } 4808 } 4809 } 4810 4811 /* from roots to leaves */ 4812 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4813 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4814 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4815 4816 /* create new IS with global indexes without holes */ 4817 if (subset_mult) { 4818 const PetscInt* idxs_mult; 4819 PetscInt cum; 4820 4821 cum = 0; 4822 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4823 for (i=0;i<n;i++) { 4824 PetscInt j; 4825 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4826 } 4827 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4828 } else { 4829 for (i=0;i<n;i++) { 4830 gidxs[i] = leaf_data[i]-1; 4831 } 4832 } 4833 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4834 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4835 PetscFunctionReturn(0); 4836 } 4837 4838 #undef __FUNCT__ 4839 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4840 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4841 { 4842 PetscInt i,j; 4843 PetscScalar *alphas; 4844 PetscErrorCode ierr; 4845 4846 PetscFunctionBegin; 4847 /* this implements stabilized Gram-Schmidt */ 4848 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4849 for (i=0;i<n;i++) { 4850 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4851 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4852 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4853 } 4854 ierr = PetscFree(alphas);CHKERRQ(ierr); 4855 PetscFunctionReturn(0); 4856 } 4857 4858 #undef __FUNCT__ 4859 #define __FUNCT__ "MatISGetSubassemblingPattern" 4860 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4861 { 4862 Mat A; 4863 PetscInt n_neighs,*neighs,*n_shared,**shared; 4864 PetscMPIInt size,rank,color; 4865 PetscInt *xadj,*adjncy; 4866 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4867 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4868 PetscInt void_procs,*procs_candidates = NULL; 4869 PetscInt xadj_count, *count; 4870 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4871 PetscSubcomm psubcomm; 4872 MPI_Comm subcomm; 4873 PetscErrorCode ierr; 4874 4875 PetscFunctionBegin; 4876 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4877 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4878 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4879 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4880 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4881 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4882 4883 if (have_void) *have_void = PETSC_FALSE; 4884 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4885 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4886 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4887 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4888 im_active = !!(n); 4889 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4890 void_procs = size - active_procs; 4891 /* get ranks of of non-active processes in mat communicator */ 4892 if (void_procs) { 4893 PetscInt ncand; 4894 4895 if (have_void) *have_void = PETSC_TRUE; 4896 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4897 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4898 for (i=0,ncand=0;i<size;i++) { 4899 if (!procs_candidates[i]) { 4900 procs_candidates[ncand++] = i; 4901 } 4902 } 4903 /* force n_subdomains to be not greater that the number of non-active processes */ 4904 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4905 } 4906 4907 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4908 if (active_procs < *n_subdomains) { 4909 PetscInt issize,isidx; 4910 if (im_active) { 4911 issize = 1; 4912 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4913 isidx = procs_candidates[rank]; 4914 } else { 4915 isidx = rank; 4916 } 4917 } else { 4918 issize = 0; 4919 isidx = -1; 4920 } 4921 *n_subdomains = active_procs; 4922 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4923 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4924 PetscFunctionReturn(0); 4925 } 4926 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4927 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4928 threshold = PetscMax(threshold,2); 4929 4930 /* Get info on mapping */ 4931 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4932 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4933 4934 /* build local CSR graph of subdomains' connectivity */ 4935 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4936 xadj[0] = 0; 4937 xadj[1] = PetscMax(n_neighs-1,0); 4938 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4939 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4940 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4941 for (i=1;i<n_neighs;i++) 4942 for (j=0;j<n_shared[i];j++) 4943 count[shared[i][j]] += 1; 4944 4945 xadj_count = 0; 4946 for (i=1;i<n_neighs;i++) { 4947 for (j=0;j<n_shared[i];j++) { 4948 if (count[shared[i][j]] < threshold) { 4949 adjncy[xadj_count] = neighs[i]; 4950 adjncy_wgt[xadj_count] = n_shared[i]; 4951 xadj_count++; 4952 break; 4953 } 4954 } 4955 } 4956 xadj[1] = xadj_count; 4957 ierr = PetscFree(count);CHKERRQ(ierr); 4958 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4959 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4960 4961 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4962 4963 /* Restrict work on active processes only */ 4964 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4965 if (void_procs) { 4966 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4967 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4968 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4969 subcomm = PetscSubcommChild(psubcomm); 4970 } else { 4971 psubcomm = NULL; 4972 subcomm = PetscObjectComm((PetscObject)mat); 4973 } 4974 4975 v_wgt = NULL; 4976 if (!color) { 4977 ierr = PetscFree(xadj);CHKERRQ(ierr); 4978 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4979 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4980 } else { 4981 Mat subdomain_adj; 4982 IS new_ranks,new_ranks_contig; 4983 MatPartitioning partitioner; 4984 PetscInt rstart=0,rend=0; 4985 PetscInt *is_indices,*oldranks; 4986 PetscMPIInt size; 4987 PetscBool aggregate; 4988 4989 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4990 if (void_procs) { 4991 PetscInt prank = rank; 4992 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4993 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4994 for (i=0;i<xadj[1];i++) { 4995 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4996 } 4997 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4998 } else { 4999 oldranks = NULL; 5000 } 5001 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5002 if (aggregate) { /* TODO: all this part could be made more efficient */ 5003 PetscInt lrows,row,ncols,*cols; 5004 PetscMPIInt nrank; 5005 PetscScalar *vals; 5006 5007 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5008 lrows = 0; 5009 if (nrank<redprocs) { 5010 lrows = size/redprocs; 5011 if (nrank<size%redprocs) lrows++; 5012 } 5013 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5014 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5015 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5016 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5017 row = nrank; 5018 ncols = xadj[1]-xadj[0]; 5019 cols = adjncy; 5020 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5021 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5022 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5023 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5024 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5025 ierr = PetscFree(xadj);CHKERRQ(ierr); 5026 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5027 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5028 ierr = PetscFree(vals);CHKERRQ(ierr); 5029 if (use_vwgt) { 5030 Vec v; 5031 const PetscScalar *array; 5032 PetscInt nl; 5033 5034 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5035 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5036 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5037 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5038 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5039 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5040 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5041 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5042 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5043 ierr = VecDestroy(&v);CHKERRQ(ierr); 5044 } 5045 } else { 5046 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5047 if (use_vwgt) { 5048 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5049 v_wgt[0] = local_size; 5050 } 5051 } 5052 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5053 5054 /* Partition */ 5055 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5056 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5057 if (v_wgt) { 5058 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5059 } 5060 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5061 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5062 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5063 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5064 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5065 5066 /* renumber new_ranks to avoid "holes" in new set of processors */ 5067 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5068 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5069 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5070 if (!aggregate) { 5071 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5072 #if defined(PETSC_USE_DEBUG) 5073 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5074 #endif 5075 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5076 } else if (oldranks) { 5077 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5078 } else { 5079 ranks_send_to_idx[0] = is_indices[0]; 5080 } 5081 } else { 5082 PetscInt idxs[1]; 5083 PetscMPIInt tag; 5084 MPI_Request *reqs; 5085 5086 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5087 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5088 for (i=rstart;i<rend;i++) { 5089 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5090 } 5091 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5092 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5093 ierr = PetscFree(reqs);CHKERRQ(ierr); 5094 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5095 #if defined(PETSC_USE_DEBUG) 5096 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5097 #endif 5098 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5099 } else if (oldranks) { 5100 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5101 } else { 5102 ranks_send_to_idx[0] = idxs[0]; 5103 } 5104 } 5105 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5106 /* clean up */ 5107 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5108 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5109 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5110 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5111 } 5112 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5113 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5114 5115 /* assemble parallel IS for sends */ 5116 i = 1; 5117 if (!color) i=0; 5118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5119 PetscFunctionReturn(0); 5120 } 5121 5122 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5123 5124 #undef __FUNCT__ 5125 #define __FUNCT__ "MatISSubassemble" 5126 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5127 { 5128 Mat local_mat; 5129 IS is_sends_internal; 5130 PetscInt rows,cols,new_local_rows; 5131 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5132 PetscBool ismatis,isdense,newisdense,destroy_mat; 5133 ISLocalToGlobalMapping l2gmap; 5134 PetscInt* l2gmap_indices; 5135 const PetscInt* is_indices; 5136 MatType new_local_type; 5137 /* buffers */ 5138 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5139 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5140 PetscInt *recv_buffer_idxs_local; 5141 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5142 /* MPI */ 5143 MPI_Comm comm,comm_n; 5144 PetscSubcomm subcomm; 5145 PetscMPIInt n_sends,n_recvs,commsize; 5146 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5147 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5148 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5149 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5150 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5151 PetscErrorCode ierr; 5152 5153 PetscFunctionBegin; 5154 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5155 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5156 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5157 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5158 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5159 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5160 PetscValidLogicalCollectiveBool(mat,reuse,6); 5161 PetscValidLogicalCollectiveInt(mat,nis,8); 5162 5163 /* further checks */ 5164 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5165 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5166 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5167 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5168 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5169 if (reuse && *mat_n) { 5170 PetscInt mrows,mcols,mnrows,mncols; 5171 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5172 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5173 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5174 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5175 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5176 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5177 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5178 } 5179 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5180 PetscValidLogicalCollectiveInt(mat,bs,0); 5181 5182 /* prepare IS for sending if not provided */ 5183 if (!is_sends) { 5184 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5185 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5186 } else { 5187 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5188 is_sends_internal = is_sends; 5189 } 5190 5191 /* get comm */ 5192 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5193 5194 /* compute number of sends */ 5195 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5196 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5197 5198 /* compute number of receives */ 5199 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5200 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5201 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5202 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5203 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5204 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5205 ierr = PetscFree(iflags);CHKERRQ(ierr); 5206 5207 /* restrict comm if requested */ 5208 subcomm = 0; 5209 destroy_mat = PETSC_FALSE; 5210 if (restrict_comm) { 5211 PetscMPIInt color,subcommsize; 5212 5213 color = 0; 5214 if (restrict_full) { 5215 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5216 } else { 5217 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5218 } 5219 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5220 subcommsize = commsize - subcommsize; 5221 /* check if reuse has been requested */ 5222 if (reuse) { 5223 if (*mat_n) { 5224 PetscMPIInt subcommsize2; 5225 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5226 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5227 comm_n = PetscObjectComm((PetscObject)*mat_n); 5228 } else { 5229 comm_n = PETSC_COMM_SELF; 5230 } 5231 } else { /* MAT_INITIAL_MATRIX */ 5232 PetscMPIInt rank; 5233 5234 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5235 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5236 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5237 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5238 comm_n = PetscSubcommChild(subcomm); 5239 } 5240 /* flag to destroy *mat_n if not significative */ 5241 if (color) destroy_mat = PETSC_TRUE; 5242 } else { 5243 comm_n = comm; 5244 } 5245 5246 /* prepare send/receive buffers */ 5247 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5248 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5249 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5250 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5251 if (nis) { 5252 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5253 } 5254 5255 /* Get data from local matrices */ 5256 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5257 /* TODO: See below some guidelines on how to prepare the local buffers */ 5258 /* 5259 send_buffer_vals should contain the raw values of the local matrix 5260 send_buffer_idxs should contain: 5261 - MatType_PRIVATE type 5262 - PetscInt size_of_l2gmap 5263 - PetscInt global_row_indices[size_of_l2gmap] 5264 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5265 */ 5266 else { 5267 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5268 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5269 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5270 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5271 send_buffer_idxs[1] = i; 5272 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5273 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5274 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5275 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5276 for (i=0;i<n_sends;i++) { 5277 ilengths_vals[is_indices[i]] = len*len; 5278 ilengths_idxs[is_indices[i]] = len+2; 5279 } 5280 } 5281 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5282 /* additional is (if any) */ 5283 if (nis) { 5284 PetscMPIInt psum; 5285 PetscInt j; 5286 for (j=0,psum=0;j<nis;j++) { 5287 PetscInt plen; 5288 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5289 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5290 psum += len+1; /* indices + lenght */ 5291 } 5292 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5293 for (j=0,psum=0;j<nis;j++) { 5294 PetscInt plen; 5295 const PetscInt *is_array_idxs; 5296 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5297 send_buffer_idxs_is[psum] = plen; 5298 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5299 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5300 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5301 psum += plen+1; /* indices + lenght */ 5302 } 5303 for (i=0;i<n_sends;i++) { 5304 ilengths_idxs_is[is_indices[i]] = psum; 5305 } 5306 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5307 } 5308 5309 buf_size_idxs = 0; 5310 buf_size_vals = 0; 5311 buf_size_idxs_is = 0; 5312 for (i=0;i<n_recvs;i++) { 5313 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5314 buf_size_vals += (PetscInt)olengths_vals[i]; 5315 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5316 } 5317 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5318 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5319 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5320 5321 /* get new tags for clean communications */ 5322 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5323 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5324 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5325 5326 /* allocate for requests */ 5327 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5328 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5329 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5330 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5331 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5332 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5333 5334 /* communications */ 5335 ptr_idxs = recv_buffer_idxs; 5336 ptr_vals = recv_buffer_vals; 5337 ptr_idxs_is = recv_buffer_idxs_is; 5338 for (i=0;i<n_recvs;i++) { 5339 source_dest = onodes[i]; 5340 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5341 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5342 ptr_idxs += olengths_idxs[i]; 5343 ptr_vals += olengths_vals[i]; 5344 if (nis) { 5345 source_dest = onodes_is[i]; 5346 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5347 ptr_idxs_is += olengths_idxs_is[i]; 5348 } 5349 } 5350 for (i=0;i<n_sends;i++) { 5351 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5352 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5353 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5354 if (nis) { 5355 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5356 } 5357 } 5358 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5359 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5360 5361 /* assemble new l2g map */ 5362 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5363 ptr_idxs = recv_buffer_idxs; 5364 new_local_rows = 0; 5365 for (i=0;i<n_recvs;i++) { 5366 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5367 ptr_idxs += olengths_idxs[i]; 5368 } 5369 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5370 ptr_idxs = recv_buffer_idxs; 5371 new_local_rows = 0; 5372 for (i=0;i<n_recvs;i++) { 5373 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5374 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5375 ptr_idxs += olengths_idxs[i]; 5376 } 5377 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5378 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5379 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5380 5381 /* infer new local matrix type from received local matrices type */ 5382 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5383 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5384 if (n_recvs) { 5385 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5386 ptr_idxs = recv_buffer_idxs; 5387 for (i=0;i<n_recvs;i++) { 5388 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5389 new_local_type_private = MATAIJ_PRIVATE; 5390 break; 5391 } 5392 ptr_idxs += olengths_idxs[i]; 5393 } 5394 switch (new_local_type_private) { 5395 case MATDENSE_PRIVATE: 5396 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5397 new_local_type = MATSEQAIJ; 5398 bs = 1; 5399 } else { /* if I receive only 1 dense matrix */ 5400 new_local_type = MATSEQDENSE; 5401 bs = 1; 5402 } 5403 break; 5404 case MATAIJ_PRIVATE: 5405 new_local_type = MATSEQAIJ; 5406 bs = 1; 5407 break; 5408 case MATBAIJ_PRIVATE: 5409 new_local_type = MATSEQBAIJ; 5410 break; 5411 case MATSBAIJ_PRIVATE: 5412 new_local_type = MATSEQSBAIJ; 5413 break; 5414 default: 5415 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5416 break; 5417 } 5418 } else { /* by default, new_local_type is seqdense */ 5419 new_local_type = MATSEQDENSE; 5420 bs = 1; 5421 } 5422 5423 /* create MATIS object if needed */ 5424 if (!reuse) { 5425 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5426 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5427 } else { 5428 /* it also destroys the local matrices */ 5429 if (*mat_n) { 5430 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5431 } else { /* this is a fake object */ 5432 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5433 } 5434 } 5435 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5436 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5437 5438 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5439 5440 /* Global to local map of received indices */ 5441 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5442 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5443 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5444 5445 /* restore attributes -> type of incoming data and its size */ 5446 buf_size_idxs = 0; 5447 for (i=0;i<n_recvs;i++) { 5448 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5449 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5450 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5451 } 5452 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5453 5454 /* set preallocation */ 5455 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5456 if (!newisdense) { 5457 PetscInt *new_local_nnz=0; 5458 5459 ptr_idxs = recv_buffer_idxs_local; 5460 if (n_recvs) { 5461 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5462 } 5463 for (i=0;i<n_recvs;i++) { 5464 PetscInt j; 5465 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5466 for (j=0;j<*(ptr_idxs+1);j++) { 5467 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5468 } 5469 } else { 5470 /* TODO */ 5471 } 5472 ptr_idxs += olengths_idxs[i]; 5473 } 5474 if (new_local_nnz) { 5475 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5476 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5477 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5478 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5479 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5480 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5481 } else { 5482 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5483 } 5484 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5485 } else { 5486 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5487 } 5488 5489 /* set values */ 5490 ptr_vals = recv_buffer_vals; 5491 ptr_idxs = recv_buffer_idxs_local; 5492 for (i=0;i<n_recvs;i++) { 5493 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5494 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5495 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5496 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5497 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5498 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5499 } else { 5500 /* TODO */ 5501 } 5502 ptr_idxs += olengths_idxs[i]; 5503 ptr_vals += olengths_vals[i]; 5504 } 5505 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5506 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5507 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5508 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5509 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5510 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5511 5512 #if 0 5513 if (!restrict_comm) { /* check */ 5514 Vec lvec,rvec; 5515 PetscReal infty_error; 5516 5517 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5518 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5519 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5520 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5521 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5522 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5523 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5524 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5525 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5526 } 5527 #endif 5528 5529 /* assemble new additional is (if any) */ 5530 if (nis) { 5531 PetscInt **temp_idxs,*count_is,j,psum; 5532 5533 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5534 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5535 ptr_idxs = recv_buffer_idxs_is; 5536 psum = 0; 5537 for (i=0;i<n_recvs;i++) { 5538 for (j=0;j<nis;j++) { 5539 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5540 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5541 psum += plen; 5542 ptr_idxs += plen+1; /* shift pointer to received data */ 5543 } 5544 } 5545 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5546 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5547 for (i=1;i<nis;i++) { 5548 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5549 } 5550 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5551 ptr_idxs = recv_buffer_idxs_is; 5552 for (i=0;i<n_recvs;i++) { 5553 for (j=0;j<nis;j++) { 5554 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5555 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5556 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5557 ptr_idxs += plen+1; /* shift pointer to received data */ 5558 } 5559 } 5560 for (i=0;i<nis;i++) { 5561 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5562 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5563 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5564 } 5565 ierr = PetscFree(count_is);CHKERRQ(ierr); 5566 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5567 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5568 } 5569 /* free workspace */ 5570 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5571 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5572 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5573 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5574 if (isdense) { 5575 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5576 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5577 } else { 5578 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5579 } 5580 if (nis) { 5581 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5582 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5583 } 5584 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5585 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5586 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5587 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5588 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5589 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5590 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5591 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5592 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5593 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5594 ierr = PetscFree(onodes);CHKERRQ(ierr); 5595 if (nis) { 5596 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5597 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5598 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5599 } 5600 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5601 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5602 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5603 for (i=0;i<nis;i++) { 5604 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5605 } 5606 *mat_n = NULL; 5607 } 5608 PetscFunctionReturn(0); 5609 } 5610 5611 /* temporary hack into ksp private data structure */ 5612 #include <petsc/private/kspimpl.h> 5613 5614 #undef __FUNCT__ 5615 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5616 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5617 { 5618 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5619 PC_IS *pcis = (PC_IS*)pc->data; 5620 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5621 MatNullSpace CoarseNullSpace=NULL; 5622 ISLocalToGlobalMapping coarse_islg; 5623 IS coarse_is,*isarray; 5624 PetscInt i,im_active=-1,active_procs=-1; 5625 PetscInt nis,nisdofs,nisneu,nisvert; 5626 PC pc_temp; 5627 PCType coarse_pc_type; 5628 KSPType coarse_ksp_type; 5629 PetscBool multilevel_requested,multilevel_allowed; 5630 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5631 Mat t_coarse_mat_is; 5632 PetscInt ncoarse; 5633 PetscBool compute_vecs = PETSC_FALSE; 5634 PetscScalar *array; 5635 MatReuse coarse_mat_reuse; 5636 PetscBool restr, full_restr, have_void; 5637 PetscErrorCode ierr; 5638 5639 PetscFunctionBegin; 5640 /* Assign global numbering to coarse dofs */ 5641 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5642 PetscInt ocoarse_size; 5643 compute_vecs = PETSC_TRUE; 5644 ocoarse_size = pcbddc->coarse_size; 5645 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5646 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5647 /* see if we can avoid some work */ 5648 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5649 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5650 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5651 PC pc; 5652 PetscBool isbddc; 5653 5654 /* temporary workaround since PCBDDC does not have a reset method so far */ 5655 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5656 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5657 if (isbddc) { 5658 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5659 } else { 5660 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5661 } 5662 coarse_reuse = PETSC_FALSE; 5663 } else { /* we can safely reuse already computed coarse matrix */ 5664 coarse_reuse = PETSC_TRUE; 5665 } 5666 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5667 coarse_reuse = PETSC_FALSE; 5668 } 5669 /* reset any subassembling information */ 5670 if (!coarse_reuse || pcbddc->recompute_topography) { 5671 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5672 } 5673 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5674 coarse_reuse = PETSC_TRUE; 5675 } 5676 /* assemble coarse matrix */ 5677 if (coarse_reuse && pcbddc->coarse_ksp) { 5678 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5679 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5680 coarse_mat_reuse = MAT_REUSE_MATRIX; 5681 } else { 5682 coarse_mat = NULL; 5683 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5684 } 5685 5686 /* creates temporary l2gmap and IS for coarse indexes */ 5687 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5688 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5689 5690 /* creates temporary MATIS object for coarse matrix */ 5691 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5692 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5693 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5694 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5695 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5696 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5697 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5700 5701 /* count "active" (i.e. with positive local size) and "void" processes */ 5702 im_active = !!(pcis->n); 5703 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5704 5705 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5706 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5707 /* full_restr : just use the receivers from the subassembling pattern */ 5708 coarse_mat_is = NULL; 5709 multilevel_allowed = PETSC_FALSE; 5710 multilevel_requested = PETSC_FALSE; 5711 full_restr = PETSC_TRUE; 5712 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5713 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5714 if (multilevel_requested) { 5715 ncoarse = active_procs/pcbddc->coarsening_ratio; 5716 restr = PETSC_FALSE; 5717 full_restr = PETSC_FALSE; 5718 } else { 5719 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5720 restr = PETSC_TRUE; 5721 full_restr = PETSC_TRUE; 5722 } 5723 ncoarse = PetscMax(1,ncoarse); 5724 if (!pcbddc->coarse_subassembling) { 5725 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5726 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5727 PetscInt psum; 5728 PetscMPIInt size; 5729 if (pcbddc->coarse_ksp) psum = 1; 5730 else psum = 0; 5731 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5732 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5733 if (ncoarse < size) have_void = PETSC_TRUE; 5734 } 5735 /* determine if we can go multilevel */ 5736 if (multilevel_requested) { 5737 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5738 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5739 } 5740 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5741 5742 /* dump subassembling pattern */ 5743 if (pcbddc->dbg_flag && multilevel_allowed) { 5744 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5745 } 5746 5747 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5748 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5749 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5750 const PetscInt *idxs; 5751 ISLocalToGlobalMapping tmap; 5752 5753 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5754 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5755 /* allocate space for temporary storage */ 5756 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5757 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5758 /* allocate for IS array */ 5759 nisdofs = pcbddc->n_ISForDofsLocal; 5760 nisneu = !!pcbddc->NeumannBoundariesLocal; 5761 nisvert = 0; /* nisvert is not used */ 5762 nis = nisdofs + nisneu + nisvert; 5763 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5764 /* dofs splitting */ 5765 for (i=0;i<nisdofs;i++) { 5766 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5767 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5768 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5769 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5770 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5771 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5772 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5773 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5774 } 5775 /* neumann boundaries */ 5776 if (pcbddc->NeumannBoundariesLocal) { 5777 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5778 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5779 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5780 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5781 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5782 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5783 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5784 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5785 } 5786 /* free memory */ 5787 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5788 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5789 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5790 } else { 5791 nis = 0; 5792 nisdofs = 0; 5793 nisneu = 0; 5794 nisvert = 0; 5795 isarray = NULL; 5796 } 5797 /* destroy no longer needed map */ 5798 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5799 5800 /* subassemble */ 5801 if (multilevel_allowed) { 5802 PetscBool reuse,reuser; 5803 if (coarse_mat) reuse = PETSC_TRUE; 5804 else reuse = PETSC_FALSE; 5805 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5806 if (reuser) { 5807 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5808 } else { 5809 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5810 } 5811 /* TODO: if (pcbddc->benign_have_null) -> give a hint to the coarser levels if they have to locally apply the benign trick or not */ 5812 } else { 5813 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5814 } 5815 if (coarse_mat_is || coarse_mat) { 5816 PetscMPIInt size; 5817 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5818 if (!multilevel_allowed) { 5819 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5820 } else { 5821 Mat A; 5822 5823 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5824 if (coarse_mat_is) { 5825 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5826 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5827 coarse_mat = coarse_mat_is; 5828 } 5829 /* be sure we don't have MatSeqDENSE as local mat */ 5830 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5831 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5832 } 5833 } 5834 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5835 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5836 5837 /* create local to global scatters for coarse problem */ 5838 if (compute_vecs) { 5839 PetscInt lrows; 5840 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5841 if (coarse_mat) { 5842 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5843 } else { 5844 lrows = 0; 5845 } 5846 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5847 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5848 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5849 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5850 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5851 } 5852 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5853 5854 /* set defaults for coarse KSP and PC */ 5855 if (multilevel_allowed) { 5856 coarse_ksp_type = KSPRICHARDSON; 5857 coarse_pc_type = PCBDDC; 5858 } else { 5859 coarse_ksp_type = KSPPREONLY; 5860 coarse_pc_type = PCREDUNDANT; 5861 } 5862 5863 /* print some info if requested */ 5864 if (pcbddc->dbg_flag) { 5865 if (!multilevel_allowed) { 5866 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5867 if (multilevel_requested) { 5868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5869 } else if (pcbddc->max_levels) { 5870 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5871 } 5872 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5873 } 5874 } 5875 5876 /* create the coarse KSP object only once with defaults */ 5877 if (coarse_mat) { 5878 PetscViewer dbg_viewer = NULL; 5879 if (pcbddc->dbg_flag) { 5880 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5881 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5882 } 5883 if (!pcbddc->coarse_ksp) { 5884 char prefix[256],str_level[16]; 5885 size_t len; 5886 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5887 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5888 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5889 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5890 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5891 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5892 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5893 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5894 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5895 /* prefix */ 5896 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5897 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5898 if (!pcbddc->current_level) { 5899 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5900 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5901 } else { 5902 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5903 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5904 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5905 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5906 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5907 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5908 } 5909 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5910 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5911 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5912 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5913 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5914 /* allow user customization */ 5915 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5916 } 5917 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5918 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5919 if (nisdofs) { 5920 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5921 for (i=0;i<nisdofs;i++) { 5922 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5923 } 5924 } 5925 if (nisneu) { 5926 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5927 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5928 } 5929 if (nisvert) { 5930 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5931 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5932 } 5933 5934 /* get some info after set from options */ 5935 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5936 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5937 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5938 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5939 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5940 isbddc = PETSC_FALSE; 5941 } 5942 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5943 if (isredundant) { 5944 KSP inner_ksp; 5945 PC inner_pc; 5946 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5947 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5948 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5949 } 5950 5951 /* parameters which miss an API */ 5952 if (isbddc) { 5953 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5954 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5955 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5956 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5957 if (pcbddc_coarse->benign_saddle_point) { 5958 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5959 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5960 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5961 } 5962 } 5963 5964 /* propagate symmetry info of coarse matrix */ 5965 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5966 if (pc->pmat->symmetric_set) { 5967 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5968 } 5969 if (pc->pmat->hermitian_set) { 5970 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5971 } 5972 if (pc->pmat->spd_set) { 5973 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5974 } 5975 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 5976 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 5977 } 5978 /* set operators */ 5979 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5980 if (pcbddc->dbg_flag) { 5981 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5982 } 5983 } 5984 ierr = PetscFree(isarray);CHKERRQ(ierr); 5985 #if 0 5986 { 5987 PetscViewer viewer; 5988 char filename[256]; 5989 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5990 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5991 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5992 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5993 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5994 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5995 } 5996 #endif 5997 5998 /* Compute coarse null space (special handling by BDDC only) */ 5999 #if 0 6000 if (pcbddc->NullSpace) { 6001 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 6002 } 6003 #endif 6004 /* hack */ 6005 if (pcbddc->coarse_ksp) { 6006 Vec crhs,csol; 6007 6008 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6009 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6010 if (!csol) { 6011 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6012 } 6013 if (!crhs) { 6014 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6015 } 6016 } 6017 6018 /* compute null space for coarse solver if the benign trick has been requested */ 6019 if (pcbddc->benign_null) { 6020 6021 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6022 for (i=0;i<pcbddc->benign_n;i++) { 6023 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6024 } 6025 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6026 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6027 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6028 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6029 if (coarse_mat) { 6030 Vec nullv; 6031 PetscScalar *array,*array2; 6032 PetscInt nl; 6033 6034 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6035 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6036 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6037 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6038 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6039 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6040 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6041 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6042 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6043 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6044 } 6045 } 6046 6047 if (pcbddc->coarse_ksp) { 6048 PetscBool ispreonly; 6049 6050 if (CoarseNullSpace) { 6051 PetscBool isnull; 6052 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6053 if (0) { 6054 if (isbddc && !pcbddc->benign_saddle_point) { 6055 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 6056 } else { 6057 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6058 } 6059 } else { 6060 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6061 } 6062 } 6063 /* setup coarse ksp */ 6064 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6065 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6066 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6067 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6068 KSP check_ksp; 6069 KSPType check_ksp_type; 6070 PC check_pc; 6071 Vec check_vec,coarse_vec; 6072 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6073 PetscInt its; 6074 PetscBool compute_eigs; 6075 PetscReal *eigs_r,*eigs_c; 6076 PetscInt neigs; 6077 const char *prefix; 6078 6079 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6080 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6081 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6082 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6083 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6084 /* prevent from setup unneeded object */ 6085 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6086 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6087 if (ispreonly) { 6088 check_ksp_type = KSPPREONLY; 6089 compute_eigs = PETSC_FALSE; 6090 } else { 6091 check_ksp_type = KSPGMRES; 6092 compute_eigs = PETSC_TRUE; 6093 } 6094 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6095 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6096 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6097 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6098 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6099 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6100 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6101 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6102 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6103 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6104 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6105 /* create random vec */ 6106 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6107 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6108 if (CoarseNullSpace) { 6109 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 6110 } 6111 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6112 /* solve coarse problem */ 6113 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6114 if (CoarseNullSpace) { 6115 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 6116 } 6117 /* set eigenvalue estimation if preonly has not been requested */ 6118 if (compute_eigs) { 6119 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6120 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6121 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6122 lambda_max = eigs_r[neigs-1]; 6123 lambda_min = eigs_r[0]; 6124 if (pcbddc->use_coarse_estimates) { 6125 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6126 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6127 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6128 } 6129 } 6130 } 6131 6132 /* check coarse problem residual error */ 6133 if (pcbddc->dbg_flag) { 6134 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6135 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6136 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6137 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6138 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6139 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6140 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6141 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6142 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6143 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6144 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6145 if (CoarseNullSpace) { 6146 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6147 } 6148 if (compute_eigs) { 6149 PetscReal lambda_max_s,lambda_min_s; 6150 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6151 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6152 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6153 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6154 for (i=0;i<neigs;i++) { 6155 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6156 } 6157 } 6158 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6159 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6160 } 6161 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6162 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6163 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6164 if (compute_eigs) { 6165 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6166 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6167 } 6168 } 6169 } 6170 /* print additional info */ 6171 if (pcbddc->dbg_flag) { 6172 /* waits until all processes reaches this point */ 6173 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6175 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6176 } 6177 6178 /* free memory */ 6179 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6180 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6181 PetscFunctionReturn(0); 6182 } 6183 6184 #undef __FUNCT__ 6185 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6186 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6187 { 6188 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6189 PC_IS* pcis = (PC_IS*)pc->data; 6190 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6191 IS subset,subset_mult,subset_n; 6192 PetscInt local_size,coarse_size=0; 6193 PetscInt *local_primal_indices=NULL; 6194 const PetscInt *t_local_primal_indices; 6195 PetscErrorCode ierr; 6196 6197 PetscFunctionBegin; 6198 /* Compute global number of coarse dofs */ 6199 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6200 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6201 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6202 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6203 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6204 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6205 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6206 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6207 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6208 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6209 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6210 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6211 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6212 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6213 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6214 6215 /* check numbering */ 6216 if (pcbddc->dbg_flag) { 6217 PetscScalar coarsesum,*array,*array2; 6218 PetscInt i; 6219 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6220 6221 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6222 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6223 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6224 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6225 /* counter */ 6226 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6227 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6228 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6229 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6230 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6231 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6232 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6233 for (i=0;i<pcbddc->local_primal_size;i++) { 6234 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6235 } 6236 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6237 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6238 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6239 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6240 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6241 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6242 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6243 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6244 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6245 for (i=0;i<pcis->n;i++) { 6246 if (array[i] != 0.0 && array[i] != array2[i]) { 6247 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6248 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6249 set_error = PETSC_TRUE; 6250 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6252 } 6253 } 6254 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6255 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6257 for (i=0;i<pcis->n;i++) { 6258 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6259 } 6260 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6261 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6262 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6263 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6264 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6266 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6267 PetscInt *gidxs; 6268 6269 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6270 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6274 for (i=0;i<pcbddc->local_primal_size;i++) { 6275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6276 } 6277 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6278 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6279 } 6280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6281 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6282 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6283 } 6284 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6285 /* get back data */ 6286 *coarse_size_n = coarse_size; 6287 *local_primal_indices_n = local_primal_indices; 6288 PetscFunctionReturn(0); 6289 } 6290 6291 #undef __FUNCT__ 6292 #define __FUNCT__ "PCBDDCGlobalToLocal" 6293 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6294 { 6295 IS localis_t; 6296 PetscInt i,lsize,*idxs,n; 6297 PetscScalar *vals; 6298 PetscErrorCode ierr; 6299 6300 PetscFunctionBegin; 6301 /* get indices in local ordering exploiting local to global map */ 6302 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6303 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6304 for (i=0;i<lsize;i++) vals[i] = 1.0; 6305 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6306 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6307 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6308 if (idxs) { /* multilevel guard */ 6309 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6310 } 6311 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6312 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6313 ierr = PetscFree(vals);CHKERRQ(ierr); 6314 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6315 /* now compute set in local ordering */ 6316 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6317 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6318 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6319 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6320 for (i=0,lsize=0;i<n;i++) { 6321 if (PetscRealPart(vals[i]) > 0.5) { 6322 lsize++; 6323 } 6324 } 6325 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6326 for (i=0,lsize=0;i<n;i++) { 6327 if (PetscRealPart(vals[i]) > 0.5) { 6328 idxs[lsize++] = i; 6329 } 6330 } 6331 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6332 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6333 *localis = localis_t; 6334 PetscFunctionReturn(0); 6335 } 6336 6337 #undef __FUNCT__ 6338 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6339 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6340 { 6341 PC_IS *pcis=(PC_IS*)pc->data; 6342 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6343 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6344 Mat S_j; 6345 PetscInt *used_xadj,*used_adjncy; 6346 PetscBool free_used_adj; 6347 PetscErrorCode ierr; 6348 6349 PetscFunctionBegin; 6350 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6351 free_used_adj = PETSC_FALSE; 6352 if (pcbddc->sub_schurs_layers == -1) { 6353 used_xadj = NULL; 6354 used_adjncy = NULL; 6355 } else { 6356 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6357 used_xadj = pcbddc->mat_graph->xadj; 6358 used_adjncy = pcbddc->mat_graph->adjncy; 6359 } else if (pcbddc->computed_rowadj) { 6360 used_xadj = pcbddc->mat_graph->xadj; 6361 used_adjncy = pcbddc->mat_graph->adjncy; 6362 } else { 6363 PetscBool flg_row=PETSC_FALSE; 6364 const PetscInt *xadj,*adjncy; 6365 PetscInt nvtxs; 6366 6367 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6368 if (flg_row) { 6369 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6370 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6371 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6372 free_used_adj = PETSC_TRUE; 6373 } else { 6374 pcbddc->sub_schurs_layers = -1; 6375 used_xadj = NULL; 6376 used_adjncy = NULL; 6377 } 6378 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6379 } 6380 } 6381 6382 /* setup sub_schurs data */ 6383 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6384 if (!sub_schurs->schur_explicit) { 6385 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6386 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6387 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6388 } else { 6389 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6390 PetscBool isseqaij,need_change = PETSC_FALSE;; 6391 PetscInt benign_n; 6392 Mat change = NULL; 6393 Vec scaling = NULL; 6394 IS change_primal = NULL; 6395 6396 if (!pcbddc->use_vertices && reuse_solvers) { 6397 PetscInt n_vertices; 6398 6399 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6400 reuse_solvers = (PetscBool)!n_vertices; 6401 } 6402 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6403 if (!isseqaij) { 6404 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6405 if (matis->A == pcbddc->local_mat) { 6406 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6407 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6408 } else { 6409 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6410 } 6411 } 6412 if (!pcbddc->benign_change_explicit) { 6413 benign_n = pcbddc->benign_n; 6414 } else { 6415 benign_n = 0; 6416 } 6417 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6418 We need a global reduction to avoid possible deadlocks. 6419 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6420 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6421 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6422 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6423 need_change = (PetscBool)(!need_change); 6424 } 6425 /* If the user defines additional constraints, we import them here. 6426 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6427 if (need_change) { 6428 PC_IS *pcisf; 6429 PC_BDDC *pcbddcf; 6430 PC pcf; 6431 6432 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6433 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6434 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6435 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6436 /* hacks */ 6437 pcisf = (PC_IS*)pcf->data; 6438 pcisf->is_B_local = pcis->is_B_local; 6439 pcisf->vec1_N = pcis->vec1_N; 6440 pcisf->BtoNmap = pcis->BtoNmap; 6441 pcisf->n = pcis->n; 6442 pcisf->n_B = pcis->n_B; 6443 pcbddcf = (PC_BDDC*)pcf->data; 6444 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6445 pcbddcf->mat_graph = pcbddc->mat_graph; 6446 pcbddcf->use_faces = PETSC_TRUE; 6447 pcbddcf->use_change_of_basis = PETSC_TRUE; 6448 pcbddcf->use_change_on_faces = PETSC_TRUE; 6449 pcbddcf->use_qr_single = PETSC_TRUE; 6450 pcbddcf->fake_change = PETSC_TRUE; 6451 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6452 /* store information on primal vertices and change of basis (in local numbering) */ 6453 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6454 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6455 change = pcbddcf->ConstraintMatrix; 6456 pcbddcf->ConstraintMatrix = NULL; 6457 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6458 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6459 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6460 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6461 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6462 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6463 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6464 pcf->ops->destroy = NULL; 6465 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6466 } 6467 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6468 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6469 ierr = MatDestroy(&change);CHKERRQ(ierr); 6470 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6471 } 6472 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6473 6474 /* free adjacency */ 6475 if (free_used_adj) { 6476 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6477 } 6478 PetscFunctionReturn(0); 6479 } 6480 6481 #undef __FUNCT__ 6482 #define __FUNCT__ "PCBDDCInitSubSchurs" 6483 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6484 { 6485 PC_IS *pcis=(PC_IS*)pc->data; 6486 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6487 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6488 PCBDDCGraph graph; 6489 PetscErrorCode ierr; 6490 6491 PetscFunctionBegin; 6492 /* attach interface graph for determining subsets */ 6493 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6494 IS verticesIS,verticescomm; 6495 PetscInt vsize,*idxs; 6496 6497 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6498 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6499 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6500 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6501 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6502 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6503 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6504 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6505 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6506 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6507 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6508 } else { 6509 graph = pcbddc->mat_graph; 6510 } 6511 /* print some info */ 6512 if (pcbddc->dbg_flag) { 6513 IS vertices; 6514 PetscInt nv,nedges,nfaces; 6515 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6516 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6517 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6518 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6519 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6523 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6524 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6525 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6526 } 6527 6528 /* sub_schurs init */ 6529 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6530 6531 /* free graph struct */ 6532 if (pcbddc->sub_schurs_rebuild) { 6533 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6534 } 6535 PetscFunctionReturn(0); 6536 } 6537 6538 #undef __FUNCT__ 6539 #define __FUNCT__ "PCBDDCCheckOperator" 6540 PetscErrorCode PCBDDCCheckOperator(PC pc) 6541 { 6542 PC_IS *pcis=(PC_IS*)pc->data; 6543 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6544 PetscErrorCode ierr; 6545 6546 PetscFunctionBegin; 6547 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6548 IS zerodiag = NULL; 6549 Mat S_j,B0_B=NULL; 6550 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6551 PetscScalar *p0_check,*array,*array2; 6552 PetscReal norm; 6553 PetscInt i; 6554 6555 /* B0 and B0_B */ 6556 if (zerodiag) { 6557 IS dummy; 6558 6559 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6560 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6561 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6562 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6563 } 6564 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6565 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6566 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6567 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6568 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6569 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6570 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6571 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6572 /* S_j */ 6573 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6574 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6575 6576 /* mimic vector in \widetilde{W}_\Gamma */ 6577 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6578 /* continuous in primal space */ 6579 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6580 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6581 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6582 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6583 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6584 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6585 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6586 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6587 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6588 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6589 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6590 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6591 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6592 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6593 6594 /* assemble rhs for coarse problem */ 6595 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6596 /* local with Schur */ 6597 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6598 if (zerodiag) { 6599 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6600 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6601 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6602 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6603 } 6604 /* sum on primal nodes the local contributions */ 6605 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6606 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6607 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6608 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6609 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6610 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6611 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6612 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6613 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6614 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6615 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6616 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6617 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6618 /* scale primal nodes (BDDC sums contibutions) */ 6619 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6620 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6621 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6622 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6623 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6624 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6625 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6626 /* global: \widetilde{B0}_B w_\Gamma */ 6627 if (zerodiag) { 6628 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6629 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6630 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6631 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6632 } 6633 /* BDDC */ 6634 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6635 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6636 6637 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6638 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6639 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6640 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6641 for (i=0;i<pcbddc->benign_n;i++) { 6642 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6643 } 6644 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6645 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6646 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6647 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6648 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6649 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6650 } 6651 PetscFunctionReturn(0); 6652 } 6653