1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 9 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 10 { 11 PetscErrorCode ierr; 12 Vec local,global; 13 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 14 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 15 16 PetscFunctionBegin; 17 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 18 /* need to convert from global to local topology information and remove references to information in global ordering */ 19 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 20 if (pcbddc->user_provided_isfordofs) { 21 if (pcbddc->n_ISForDofs) { 22 PetscInt i; 23 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 24 for (i=0;i<pcbddc->n_ISForDofs;i++) { 25 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 26 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 27 } 28 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 29 pcbddc->n_ISForDofs = 0; 30 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 31 } 32 } else { 33 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 34 PetscInt i, n = matis->A->rmap->n; 35 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 36 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 37 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 38 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 39 } 40 } 41 } 42 43 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 44 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 45 } 46 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 47 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 48 } 49 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 50 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 51 } 52 ierr = VecDestroy(&global);CHKERRQ(ierr); 53 ierr = VecDestroy(&local);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 #undef __FUNCT__ 58 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 59 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 60 { 61 PC_IS *pcis = (PC_IS*)(pc->data); 62 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 if (!pcbddc->benign_have_null) { 67 PetscFunctionReturn(0); 68 } 69 if (pcbddc->ChangeOfBasisMatrix) { 70 Vec swap; 71 72 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 73 swap = pcbddc->work_change; 74 pcbddc->work_change = r; 75 r = swap; 76 } 77 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 78 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79 if (pcbddc->benign_n) { 80 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 81 } else { 82 ierr = VecSet(pcis->vec2_D,0.);CHKERRQ(ierr); 83 } 84 ierr = VecSet(z,0.);CHKERRQ(ierr); 85 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 86 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 87 if (pcbddc->ChangeOfBasisMatrix) { 88 Vec swap; 89 90 swap = r; 91 r = pcbddc->work_change; 92 pcbddc->work_change = swap; 93 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 94 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 101 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 102 { 103 PCBDDCBenignMatMult_ctx ctx; 104 PetscErrorCode ierr; 105 PetscBool apply_right,apply_left,reset_x; 106 107 PetscFunctionBegin; 108 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 109 if (transpose) { 110 apply_right = ctx->apply_left; 111 apply_left = ctx->apply_right; 112 } else { 113 apply_right = ctx->apply_right; 114 apply_left = ctx->apply_left; 115 } 116 reset_x = PETSC_FALSE; 117 if (apply_right) { 118 const PetscScalar *ax; 119 PetscInt nl,i; 120 121 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 122 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 123 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 125 for (i=0;i<ctx->benign_n;i++) { 126 PetscScalar sum,val; 127 const PetscInt *idxs; 128 PetscInt nz,j; 129 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 130 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 131 sum = 0.; 132 if (ctx->apply_p0) { 133 val = ctx->work[idxs[nz-1]]; 134 for (j=0;j<nz-1;j++) { 135 sum += ctx->work[idxs[j]]; 136 ctx->work[idxs[j]] += val; 137 } 138 } else { 139 for (j=0;j<nz-1;j++) { 140 sum += ctx->work[idxs[j]]; 141 } 142 } 143 ctx->work[idxs[nz-1]] -= sum; 144 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 145 } 146 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 147 reset_x = PETSC_TRUE; 148 } 149 if (transpose) { 150 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 151 } else { 152 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 153 } 154 if (reset_x) { 155 ierr = VecResetArray(x);CHKERRQ(ierr); 156 } 157 if (apply_left) { 158 PetscScalar *ay; 159 PetscInt i; 160 161 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 162 for (i=0;i<ctx->benign_n;i++) { 163 PetscScalar sum,val; 164 const PetscInt *idxs; 165 PetscInt nz,j; 166 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 167 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 168 val = -ay[idxs[nz-1]]; 169 if (ctx->apply_p0) { 170 sum = 0.; 171 for (j=0;j<nz-1;j++) { 172 sum += ay[idxs[j]]; 173 ay[idxs[j]] += val; 174 } 175 ay[idxs[nz-1]] += sum; 176 } else { 177 for (j=0;j<nz-1;j++) { 178 ay[idxs[j]] += val; 179 } 180 ay[idxs[nz-1]] = 0.; 181 } 182 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 183 } 184 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 191 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 192 { 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 202 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 203 { 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "PCBDDCBenignShellMat" 213 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 214 { 215 PC_IS *pcis = (PC_IS*)pc->data; 216 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 217 PCBDDCBenignMatMult_ctx ctx; 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 if (!restore) { 222 Mat A_IB,A_BI; 223 PetscScalar *work; 224 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 225 226 if (pcbddc->benign_original_mat) { 227 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 228 } 229 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 230 PetscFunctionReturn(0); 231 } 232 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 233 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 234 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 235 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 236 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 237 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 238 ierr = PetscNew(&ctx);CHKERRQ(ierr); 239 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 240 ctx->apply_left = PETSC_TRUE; 241 ctx->apply_right = PETSC_FALSE; 242 ctx->apply_p0 = PETSC_FALSE; 243 ctx->benign_n = pcbddc->benign_n; 244 if (reuse) { 245 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 246 ctx->free = PETSC_FALSE; 247 } else { /* TODO: could be optimized for successive solves */ 248 ISLocalToGlobalMapping N_to_D; 249 PetscInt i; 250 251 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 252 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 253 for (i=0;i<pcbddc->benign_n;i++) { 254 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 255 } 256 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 257 ctx->free = PETSC_TRUE; 258 } 259 ctx->A = pcis->A_IB; 260 ctx->work = work; 261 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 262 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 263 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 264 pcis->A_IB = A_IB; 265 266 /* A_BI as A_IB^T */ 267 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 268 pcbddc->benign_original_mat = pcis->A_BI; 269 pcis->A_BI = A_BI; 270 } else { 271 if (!pcbddc->benign_original_mat) { 272 PetscFunctionReturn(0); 273 } 274 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 275 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 276 pcis->A_IB = ctx->A; 277 ctx->A = NULL; 278 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 279 pcis->A_BI = pcbddc->benign_original_mat; 280 pcbddc->benign_original_mat = NULL; 281 if (ctx->free) { 282 PetscInt i; 283 for (i=0;i<ctx->benign_n;i++) { 284 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 285 } 286 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 287 } 288 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 289 ierr = PetscFree(ctx);CHKERRQ(ierr); 290 } 291 PetscFunctionReturn(0); 292 } 293 294 /* used just in bddc debug mode */ 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCBenignProject" 297 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 298 { 299 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 300 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 301 Mat An; 302 PetscErrorCode ierr; 303 304 PetscFunctionBegin; 305 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 306 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 307 if (is1) { 308 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 309 ierr = MatDestroy(&An);CHKERRQ(ierr); 310 } else { 311 *B = An; 312 } 313 PetscFunctionReturn(0); 314 } 315 316 /* TODO: add reuse flag */ 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatSeqAIJCompress" 319 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 320 { 321 Mat Bt; 322 PetscScalar *a,*bdata; 323 const PetscInt *ii,*ij; 324 PetscInt m,n,i,nnz,*bii,*bij; 325 PetscBool flg_row; 326 PetscErrorCode ierr; 327 328 PetscFunctionBegin; 329 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 330 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 331 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 332 nnz = n; 333 for (i=0;i<ii[n];i++) { 334 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 335 } 336 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 337 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 338 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 339 nnz = 0; 340 bii[0] = 0; 341 for (i=0;i<n;i++) { 342 PetscInt j; 343 for (j=ii[i];j<ii[i+1];j++) { 344 PetscScalar entry = a[j]; 345 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 346 bij[nnz] = ij[j]; 347 bdata[nnz] = entry; 348 nnz++; 349 } 350 } 351 bii[i+1] = nnz; 352 } 353 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 354 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 355 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 356 { 357 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 358 b->free_a = PETSC_TRUE; 359 b->free_ij = PETSC_TRUE; 360 } 361 *B = Bt; 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatDetectDisconnectedComponents" 367 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 368 { 369 Mat B; 370 IS is_dummy,*cc_n; 371 ISLocalToGlobalMapping l2gmap_dummy; 372 PCBDDCGraph graph; 373 PetscInt i,n; 374 PetscInt *xadj,*adjncy; 375 PetscInt *xadj_filtered,*adjncy_filtered; 376 PetscBool flg_row,isseqaij; 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 if (!A->rmap->N || !A->cmap->N) { 381 *ncc = 0; 382 *cc = NULL; 383 PetscFunctionReturn(0); 384 } 385 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 386 if (!isseqaij && filter) { 387 PetscBool isseqdense; 388 389 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 390 if (!isseqdense) { 391 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 392 } else { /* TODO: rectangular case and LDA */ 393 PetscScalar *array; 394 PetscReal chop=1.e-6; 395 396 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 397 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 398 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 399 for (i=0;i<n;i++) { 400 PetscInt j; 401 for (j=i+1;j<n;j++) { 402 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 403 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 404 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 405 } 406 } 407 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 408 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 409 } 410 } else { 411 B = A; 412 } 413 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 414 415 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 416 if (filter) { 417 PetscScalar *data; 418 PetscInt j,cum; 419 420 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 421 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 422 cum = 0; 423 for (i=0;i<n;i++) { 424 PetscInt t; 425 426 for (j=xadj[i];j<xadj[i+1];j++) { 427 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 428 continue; 429 } 430 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 431 } 432 t = xadj_filtered[i]; 433 xadj_filtered[i] = cum; 434 cum += t; 435 } 436 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 437 } else { 438 xadj_filtered = NULL; 439 adjncy_filtered = NULL; 440 } 441 442 /* compute local connected components using PCBDDCGraph */ 443 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 444 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 445 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 446 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 447 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 448 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 449 if (xadj_filtered) { 450 graph->xadj = xadj_filtered; 451 graph->adjncy = adjncy_filtered; 452 } else { 453 graph->xadj = xadj; 454 graph->adjncy = adjncy; 455 } 456 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 457 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 458 /* partial clean up */ 459 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 460 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 461 if (A != B) { 462 ierr = MatDestroy(&B);CHKERRQ(ierr); 463 } 464 465 /* get back data */ 466 if (ncc) *ncc = graph->ncc; 467 if (cc) { 468 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 469 for (i=0;i<graph->ncc;i++) { 470 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 471 } 472 *cc = cc_n; 473 } 474 /* clean up graph */ 475 graph->xadj = 0; 476 graph->adjncy = 0; 477 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCBDDCBenignCheck" 483 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 484 { 485 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 486 PC_IS* pcis = (PC_IS*)(pc->data); 487 IS dirIS = NULL; 488 PetscInt i; 489 PetscErrorCode ierr; 490 491 PetscFunctionBegin; 492 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 493 if (zerodiag) { 494 Mat A; 495 Vec vec3_N; 496 PetscScalar *vals; 497 const PetscInt *idxs; 498 PetscInt nz,*count; 499 500 /* p0 */ 501 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 502 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 503 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 504 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 505 for (i=0;i<nz;i++) vals[i] = 1.; 506 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 507 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 508 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 509 /* v_I */ 510 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 511 for (i=0;i<nz;i++) vals[i] = 0.; 512 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 514 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 515 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 516 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 518 if (dirIS) { 519 PetscInt n; 520 521 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 522 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 523 for (i=0;i<n;i++) vals[i] = 0.; 524 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 525 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 526 } 527 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 528 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 529 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 530 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 531 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 532 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 533 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 534 if (PetscAbsScalar(vals[0]) > 1.e-1) { 535 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 536 } 537 ierr = PetscFree(vals);CHKERRQ(ierr); 538 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 539 540 /* there should not be any pressure dofs lying on the interface */ 541 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 542 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 543 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 544 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 545 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 546 for (i=0;i<nz;i++) { 547 if (count[idxs[i]]) { 548 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 549 } 550 } 551 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 552 ierr = PetscFree(count);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 555 556 /* check PCBDDCBenignGetOrSetP0 */ 557 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 558 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 559 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 560 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 561 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 562 for (i=0;i<pcbddc->benign_n;i++) { 563 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 564 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 572 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 573 { 574 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 575 IS pressures,zerodiag,*zerodiag_subs; 576 PetscInt nz,n; 577 PetscInt *interior_dofs,n_interior_dofs; 578 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 579 PetscErrorCode ierr; 580 581 PetscFunctionBegin; 582 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 583 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 584 for (n=0;n<pcbddc->benign_n;n++) { 585 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 586 } 587 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 588 pcbddc->benign_n = 0; 589 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 590 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 591 Checks if all the pressure dofs in each subdomain have a zero diagonal 592 If not, a change of basis on pressures is not needed 593 since the local Schur complements are SPD 594 */ 595 has_null_pressures = PETSC_TRUE; 596 have_null = PETSC_TRUE; 597 if (pcbddc->n_ISForDofsLocal) { 598 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 599 600 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 601 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 602 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 603 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 604 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 605 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 606 if (!sorted) { 607 ierr = ISSort(pressures);CHKERRQ(ierr); 608 } 609 } else { 610 pressures = NULL; 611 } 612 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 613 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 614 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 615 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 616 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 617 if (!sorted) { 618 ierr = ISSort(zerodiag);CHKERRQ(ierr); 619 } 620 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 621 if (!nz) { 622 if (n) have_null = PETSC_FALSE; 623 has_null_pressures = PETSC_FALSE; 624 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 625 } 626 recompute_zerodiag = PETSC_FALSE; 627 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 628 zerodiag_subs = NULL; 629 pcbddc->benign_n = 0; 630 n_interior_dofs = 0; 631 interior_dofs = NULL; 632 if (pcbddc->current_level) { /* need to compute interior nodes */ 633 PetscInt n,i,j; 634 PetscInt n_neigh,*neigh,*n_shared,**shared; 635 PetscInt *iwork; 636 637 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 638 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 639 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 640 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 641 for (i=0;i<n_neigh;i++) 642 for (j=0;j<n_shared[i];j++) 643 iwork[shared[i][j]] += 1; 644 for (i=0;i<n;i++) 645 if (!iwork[i]) 646 interior_dofs[n_interior_dofs++] = i; 647 ierr = PetscFree(iwork);CHKERRQ(ierr); 648 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 649 } 650 if (has_null_pressures) { 651 IS *subs; 652 PetscInt nsubs,i,j,nl; 653 const PetscInt *idxs; 654 PetscScalar *array; 655 Vec *work; 656 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 657 658 subs = pcbddc->local_subs; 659 nsubs = pcbddc->n_local_subs; 660 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 661 if (pcbddc->current_level) { 662 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 663 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 664 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 665 /* work[0] = 1_p */ 666 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 667 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 668 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 669 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 670 /* work[0] = 1_v */ 671 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 672 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 673 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 674 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 675 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 676 } 677 if (nsubs > 1) { 678 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 679 for (i=0;i<nsubs;i++) { 680 ISLocalToGlobalMapping l2g; 681 IS t_zerodiag_subs; 682 PetscInt nl; 683 684 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 685 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 686 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 687 if (nl) { 688 PetscBool valid = PETSC_TRUE; 689 690 if (pcbddc->current_level) { 691 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 692 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 693 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 694 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 695 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 696 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 697 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 698 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 699 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 700 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 701 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 702 for (j=0;j<n_interior_dofs;j++) { 703 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 704 valid = PETSC_FALSE; 705 break; 706 } 707 } 708 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 709 } 710 if (valid && pcbddc->NeumannBoundariesLocal) { 711 IS t_bc; 712 PetscInt nzb; 713 714 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 715 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 716 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 717 if (nzb) valid = PETSC_FALSE; 718 } 719 if (valid && pressures) { 720 IS t_pressure_subs; 721 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 722 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 723 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 724 } 725 if (valid) { 726 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 727 pcbddc->benign_n++; 728 } else { 729 recompute_zerodiag = PETSC_TRUE; 730 } 731 } 732 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 733 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 734 } 735 } else { /* there's just one subdomain (or zero if they have not been detected */ 736 PetscBool valid = PETSC_TRUE; 737 738 if (pcbddc->NeumannBoundariesLocal) { 739 PetscInt nzb; 740 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 741 if (nzb) valid = PETSC_FALSE; 742 } 743 if (valid && pressures) { 744 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 745 } 746 if (valid && pcbddc->current_level) { 747 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 748 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 749 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 750 for (j=0;j<n_interior_dofs;j++) { 751 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 752 valid = PETSC_FALSE; 753 break; 754 } 755 } 756 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 757 } 758 if (valid) { 759 pcbddc->benign_n = 1; 760 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 761 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 762 zerodiag_subs[0] = zerodiag; 763 } 764 } 765 if (pcbddc->current_level) { 766 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 767 } 768 } 769 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 770 771 if (!pcbddc->benign_n) { 772 PetscInt n; 773 774 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 775 recompute_zerodiag = PETSC_FALSE; 776 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 777 if (n) { 778 has_null_pressures = PETSC_FALSE; 779 have_null = PETSC_FALSE; 780 } 781 } 782 783 /* final check for null pressures */ 784 if (zerodiag && pressures) { 785 PetscInt nz,np; 786 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 787 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 788 if (nz != np) have_null = PETSC_FALSE; 789 } 790 791 if (recompute_zerodiag) { 792 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 793 if (pcbddc->benign_n == 1) { 794 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 795 zerodiag = zerodiag_subs[0]; 796 } else { 797 PetscInt i,nzn,*new_idxs; 798 799 nzn = 0; 800 for (i=0;i<pcbddc->benign_n;i++) { 801 PetscInt ns; 802 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 803 nzn += ns; 804 } 805 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 806 nzn = 0; 807 for (i=0;i<pcbddc->benign_n;i++) { 808 PetscInt ns,*idxs; 809 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 810 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 811 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 812 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 813 nzn += ns; 814 } 815 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 816 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 817 } 818 have_null = PETSC_FALSE; 819 } 820 821 /* no-net-flux */ 822 if (pcbddc->benign_compute_nonetflux) { 823 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 824 MatNullSpace near_null_space; 825 ISLocalToGlobalMapping rmap; 826 Vec quad_vec; 827 PetscScalar *pvals; 828 PetscInt i,np,*dummyins; 829 IS isused = NULL; 830 PetscBool participate = PETSC_TRUE; 831 832 /* create vector to hold quadrature weights */ 833 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 834 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 835 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 836 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 837 838 /* compute B^{(i)T} * 1_p */ 839 np = 0; 840 if (pressures) { 841 isused = pressures; 842 } else { 843 isused = zerodiag; 844 } 845 if (isused) { 846 ierr = ISGetLocalSize(isused,&np);CHKERRQ(ierr); 847 } 848 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 849 for (i=0;i<np;i++) pvals[i] = 1.; 850 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 851 if (np) { 852 const PetscInt *pidxs; 853 854 if (isused) { 855 ierr = ISGetIndices(isused,&pidxs);CHKERRQ(ierr); 856 } 857 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 858 if (isused) { 859 ierr = ISRestoreIndices(isused,&pidxs);CHKERRQ(ierr); 860 } 861 } 862 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 863 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 864 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 865 ierr = PetscFree(pvals);CHKERRQ(ierr); 866 if (!isused) participate = PETSC_FALSE; 867 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 868 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 869 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 870 for (i=0;i<matis->sf->nleaves;i++) 871 if (participate) matis->sf_leafdata[i] = PetscGlobalRank; 872 else matis->sf_leafdata[i] = -1; 873 874 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 875 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 876 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 877 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 878 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 879 for (i=0;i<matis->sf->nleaves;i++) { 880 if (PetscGlobalRank != matis->sf_leafdata[i]) { 881 pvals[i] = 0.; 882 } 883 } 884 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 885 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 886 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 887 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 888 ierr = PetscFree(dummyins);CHKERRQ(ierr); 889 890 /* assembly quadrature vec and attach near null space to pmat */ 891 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 892 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 893 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 894 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 895 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 896 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 897 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 898 } 899 900 /* change of basis and p0 dofs */ 901 if (has_null_pressures) { 902 IS zerodiagc; 903 const PetscInt *idxs,*idxsc; 904 PetscInt i,s,*nnz; 905 906 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 907 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 908 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 909 /* local change of basis for pressures */ 910 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 911 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 912 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 913 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 914 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 915 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 916 for (i=0;i<pcbddc->benign_n;i++) { 917 PetscInt nzs,j; 918 919 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 920 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 921 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 922 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 923 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 924 } 925 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 926 ierr = PetscFree(nnz);CHKERRQ(ierr); 927 /* set identity on velocities */ 928 for (i=0;i<n-nz;i++) { 929 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 930 } 931 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 932 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 933 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 934 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 935 /* set change on pressures */ 936 for (s=0;s<pcbddc->benign_n;s++) { 937 PetscScalar *array; 938 PetscInt nzs; 939 940 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 941 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 942 for (i=0;i<nzs-1;i++) { 943 PetscScalar vals[2]; 944 PetscInt cols[2]; 945 946 cols[0] = idxs[i]; 947 cols[1] = idxs[nzs-1]; 948 vals[0] = 1.; 949 vals[1] = 1.; 950 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 951 } 952 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 953 for (i=0;i<nzs-1;i++) array[i] = -1.; 954 array[nzs-1] = 1.; 955 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 956 /* store local idxs for p0 */ 957 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 958 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 959 ierr = PetscFree(array);CHKERRQ(ierr); 960 } 961 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 963 /* project if needed */ 964 if (pcbddc->benign_change_explicit) { 965 Mat M; 966 967 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 968 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 969 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 970 ierr = MatDestroy(&M);CHKERRQ(ierr); 971 } 972 /* store global idxs for p0 */ 973 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 974 } 975 pcbddc->benign_zerodiag_subs = zerodiag_subs; 976 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 977 978 /* determines if the coarse solver will be singular or not */ 979 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 980 /* determines if the problem has subdomains with 0 pressure block */ 981 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 982 *zerodiaglocal = zerodiag; 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 988 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 989 { 990 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 991 PetscScalar *array; 992 PetscErrorCode ierr; 993 994 PetscFunctionBegin; 995 if (!pcbddc->benign_sf) { 996 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 997 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 998 } 999 if (get) { 1000 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1001 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1002 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1003 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1004 } else { 1005 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1006 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1007 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1008 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1015 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1016 { 1017 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 /* TODO: add error checking 1022 - avoid nested pop (or push) calls. 1023 - cannot push before pop. 1024 - cannot call this if pcbddc->local_mat is NULL 1025 */ 1026 if (!pcbddc->benign_n) { 1027 PetscFunctionReturn(0); 1028 } 1029 if (pop) { 1030 if (pcbddc->benign_change_explicit) { 1031 IS is_p0; 1032 MatReuse reuse; 1033 1034 /* extract B_0 */ 1035 reuse = MAT_INITIAL_MATRIX; 1036 if (pcbddc->benign_B0) { 1037 reuse = MAT_REUSE_MATRIX; 1038 } 1039 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1040 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1041 /* remove rows and cols from local problem */ 1042 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1043 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1044 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1045 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1046 } else { 1047 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1048 PetscScalar *vals; 1049 PetscInt i,n,*idxs_ins; 1050 1051 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1052 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1053 if (!pcbddc->benign_B0) { 1054 PetscInt *nnz; 1055 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1056 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1057 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1058 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1059 for (i=0;i<pcbddc->benign_n;i++) { 1060 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1061 nnz[i] = n - nnz[i]; 1062 } 1063 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1064 ierr = PetscFree(nnz);CHKERRQ(ierr); 1065 } 1066 1067 for (i=0;i<pcbddc->benign_n;i++) { 1068 PetscScalar *array; 1069 PetscInt *idxs,j,nz,cum; 1070 1071 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1072 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1073 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1074 for (j=0;j<nz;j++) vals[j] = 1.; 1075 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1076 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1077 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1078 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1079 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1080 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1081 cum = 0; 1082 for (j=0;j<n;j++) { 1083 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1084 vals[cum] = array[j]; 1085 idxs_ins[cum] = j; 1086 cum++; 1087 } 1088 } 1089 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1090 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1091 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1092 } 1093 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1094 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1095 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1096 } 1097 } else { /* push */ 1098 if (pcbddc->benign_change_explicit) { 1099 PetscInt i; 1100 1101 for (i=0;i<pcbddc->benign_n;i++) { 1102 PetscScalar *B0_vals; 1103 PetscInt *B0_cols,B0_ncol; 1104 1105 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1106 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1107 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1108 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1109 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1110 } 1111 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1112 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1113 } else { 1114 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1115 } 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1122 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1123 { 1124 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1125 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1126 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1127 PetscBLASInt *B_iwork,*B_ifail; 1128 PetscScalar *work,lwork; 1129 PetscScalar *St,*S,*eigv; 1130 PetscScalar *Sarray,*Starray; 1131 PetscReal *eigs,thresh; 1132 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1133 PetscBool allocated_S_St; 1134 #if defined(PETSC_USE_COMPLEX) 1135 PetscReal *rwork; 1136 #endif 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1141 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1142 1143 if (pcbddc->dbg_flag) { 1144 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1147 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1148 } 1149 1150 if (pcbddc->dbg_flag) { 1151 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1152 } 1153 1154 /* max size of subsets */ 1155 mss = 0; 1156 for (i=0;i<sub_schurs->n_subs;i++) { 1157 PetscInt subset_size; 1158 1159 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1160 mss = PetscMax(mss,subset_size); 1161 } 1162 1163 /* min/max and threshold */ 1164 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1165 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1166 nmax = PetscMax(nmin,nmax); 1167 allocated_S_St = PETSC_FALSE; 1168 if (nmin) { 1169 allocated_S_St = PETSC_TRUE; 1170 } 1171 1172 /* allocate lapack workspace */ 1173 cum = cum2 = 0; 1174 maxneigs = 0; 1175 for (i=0;i<sub_schurs->n_subs;i++) { 1176 PetscInt n,subset_size; 1177 1178 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1179 n = PetscMin(subset_size,nmax); 1180 cum += subset_size; 1181 cum2 += subset_size*n; 1182 maxneigs = PetscMax(maxneigs,n); 1183 } 1184 if (mss) { 1185 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1186 PetscBLASInt B_itype = 1; 1187 PetscBLASInt B_N = mss; 1188 PetscReal zero = 0.0; 1189 PetscReal eps = 0.0; /* dlamch? */ 1190 1191 B_lwork = -1; 1192 S = NULL; 1193 St = NULL; 1194 eigs = NULL; 1195 eigv = NULL; 1196 B_iwork = NULL; 1197 B_ifail = NULL; 1198 #if defined(PETSC_USE_COMPLEX) 1199 rwork = NULL; 1200 #endif 1201 thresh = 1.0; 1202 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1203 #if defined(PETSC_USE_COMPLEX) 1204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1205 #else 1206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1207 #endif 1208 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1210 } else { 1211 /* TODO */ 1212 } 1213 } else { 1214 lwork = 0; 1215 } 1216 1217 nv = 0; 1218 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1219 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1220 } 1221 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1222 if (allocated_S_St) { 1223 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1224 } 1225 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1226 #if defined(PETSC_USE_COMPLEX) 1227 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1230 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1231 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1232 nv+cum,&pcbddc->adaptive_constraints_idxs, 1233 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1234 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1235 1236 maxneigs = 0; 1237 cum = cumarray = 0; 1238 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1239 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1240 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1241 const PetscInt *idxs; 1242 1243 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1244 for (cum=0;cum<nv;cum++) { 1245 pcbddc->adaptive_constraints_n[cum] = 1; 1246 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1247 pcbddc->adaptive_constraints_data[cum] = 1.0; 1248 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1249 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1250 } 1251 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1252 } 1253 1254 if (mss) { /* multilevel */ 1255 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1256 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1257 } 1258 1259 thresh = pcbddc->adaptive_threshold; 1260 for (i=0;i<sub_schurs->n_subs;i++) { 1261 const PetscInt *idxs; 1262 PetscReal upper,lower; 1263 PetscInt j,subset_size,eigs_start = 0; 1264 PetscBLASInt B_N; 1265 PetscBool same_data = PETSC_FALSE; 1266 1267 if (pcbddc->use_deluxe_scaling) { 1268 upper = PETSC_MAX_REAL; 1269 lower = thresh; 1270 } else { 1271 upper = 1./thresh; 1272 lower = 0.; 1273 } 1274 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1275 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1276 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1277 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1278 if (sub_schurs->is_hermitian) { 1279 PetscInt j,k; 1280 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1281 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1282 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1283 } 1284 for (j=0;j<subset_size;j++) { 1285 for (k=j;k<subset_size;k++) { 1286 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1287 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1288 } 1289 } 1290 } else { 1291 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1292 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1293 } 1294 } else { 1295 S = Sarray + cumarray; 1296 St = Starray + cumarray; 1297 } 1298 /* see if we can save some work */ 1299 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1300 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1301 } 1302 1303 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1304 B_neigs = 0; 1305 } else { 1306 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1307 PetscBLASInt B_itype = 1; 1308 PetscBLASInt B_IL, B_IU; 1309 PetscReal eps = -1.0; /* dlamch? */ 1310 PetscInt nmin_s; 1311 PetscBool compute_range = PETSC_FALSE; 1312 1313 if (pcbddc->dbg_flag) { 1314 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1315 } 1316 1317 compute_range = PETSC_FALSE; 1318 if (thresh > 1.+PETSC_SMALL && !same_data) { 1319 compute_range = PETSC_TRUE; 1320 } 1321 1322 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1323 if (compute_range) { 1324 1325 /* ask for eigenvalues larger than thresh */ 1326 #if defined(PETSC_USE_COMPLEX) 1327 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1328 #else 1329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1330 #endif 1331 } else if (!same_data) { 1332 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1333 B_IL = 1; 1334 #if defined(PETSC_USE_COMPLEX) 1335 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1336 #else 1337 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1338 #endif 1339 } else { /* same_data is true, so get the adaptive function requested by the user */ 1340 PetscInt k; 1341 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1342 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1343 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1344 nmin = nmax; 1345 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1346 for (k=0;k<nmax;k++) { 1347 eigs[k] = 1./PETSC_SMALL; 1348 eigv[k*(subset_size+1)] = 1.0; 1349 } 1350 } 1351 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1352 if (B_ierr) { 1353 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1354 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1355 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1356 } 1357 1358 if (B_neigs > nmax) { 1359 if (pcbddc->dbg_flag) { 1360 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1361 } 1362 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1363 B_neigs = nmax; 1364 } 1365 1366 nmin_s = PetscMin(nmin,B_N); 1367 if (B_neigs < nmin_s) { 1368 PetscBLASInt B_neigs2; 1369 1370 if (pcbddc->use_deluxe_scaling) { 1371 B_IL = B_N - nmin_s + 1; 1372 B_IU = B_N - B_neigs; 1373 } else { 1374 B_IL = B_neigs + 1; 1375 B_IU = nmin_s; 1376 } 1377 if (pcbddc->dbg_flag) { 1378 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1379 } 1380 if (sub_schurs->is_hermitian) { 1381 PetscInt j; 1382 for (j=0;j<subset_size;j++) { 1383 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1384 } 1385 for (j=0;j<subset_size;j++) { 1386 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1387 } 1388 } else { 1389 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1390 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1391 } 1392 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1393 #if defined(PETSC_USE_COMPLEX) 1394 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1395 #else 1396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1397 #endif 1398 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1399 B_neigs += B_neigs2; 1400 } 1401 if (B_ierr) { 1402 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1403 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1404 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1405 } 1406 if (pcbddc->dbg_flag) { 1407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1408 for (j=0;j<B_neigs;j++) { 1409 if (eigs[j] == 0.0) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1411 } else { 1412 if (pcbddc->use_deluxe_scaling) { 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1416 } 1417 } 1418 } 1419 } 1420 } else { 1421 /* TODO */ 1422 } 1423 } 1424 /* change the basis back to the original one */ 1425 if (sub_schurs->change) { 1426 Mat change,phi,phit; 1427 1428 if (pcbddc->dbg_flag > 1) { 1429 PetscInt ii; 1430 for (ii=0;ii<B_neigs;ii++) { 1431 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1432 for (j=0;j<B_N;j++) { 1433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1434 } 1435 } 1436 } 1437 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1438 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1439 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1440 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1441 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1442 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1443 } 1444 maxneigs = PetscMax(B_neigs,maxneigs); 1445 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1446 if (B_neigs) { 1447 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1448 1449 if (pcbddc->dbg_flag > 1) { 1450 PetscInt ii; 1451 for (ii=0;ii<B_neigs;ii++) { 1452 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1453 for (j=0;j<B_N;j++) { 1454 #if defined(PETSC_USE_COMPLEX) 1455 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1456 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1457 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1458 #else 1459 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1460 #endif 1461 } 1462 } 1463 } 1464 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1465 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1466 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1467 cum++; 1468 } 1469 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1470 /* shift for next computation */ 1471 cumarray += subset_size*subset_size; 1472 } 1473 if (pcbddc->dbg_flag) { 1474 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1475 } 1476 1477 if (mss) { 1478 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1479 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1480 /* destroy matrices (junk) */ 1481 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1482 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1483 } 1484 if (allocated_S_St) { 1485 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1486 } 1487 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1488 #if defined(PETSC_USE_COMPLEX) 1489 ierr = PetscFree(rwork);CHKERRQ(ierr); 1490 #endif 1491 if (pcbddc->dbg_flag) { 1492 PetscInt maxneigs_r; 1493 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1494 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "PCBDDCSetUpSolvers" 1501 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1502 { 1503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1504 PetscScalar *coarse_submat_vals; 1505 PetscErrorCode ierr; 1506 1507 PetscFunctionBegin; 1508 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1509 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1510 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1511 1512 /* Setup local neumann solver ksp_R */ 1513 /* PCBDDCSetUpLocalScatters should be called first! */ 1514 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1515 1516 /* Change global null space passed in by the user if change of basis has been requested */ 1517 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1518 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1519 } 1520 1521 /* 1522 Setup local correction and local part of coarse basis. 1523 Gives back the dense local part of the coarse matrix in column major ordering 1524 */ 1525 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1526 1527 /* Compute total number of coarse nodes and setup coarse solver */ 1528 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1529 1530 /* free */ 1531 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "PCBDDCResetCustomization" 1537 PetscErrorCode PCBDDCResetCustomization(PC pc) 1538 { 1539 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1544 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1545 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1546 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1547 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1548 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1549 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1550 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1551 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1552 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1553 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1554 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "PCBDDCResetTopography" 1560 PetscErrorCode PCBDDCResetTopography(PC pc) 1561 { 1562 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1563 PetscInt i; 1564 PetscErrorCode ierr; 1565 1566 PetscFunctionBegin; 1567 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1568 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1569 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1570 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1571 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1572 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1573 for (i=0;i<pcbddc->n_local_subs;i++) { 1574 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1575 } 1576 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1577 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "PCBDDCResetSolvers" 1583 PetscErrorCode PCBDDCResetSolvers(PC pc) 1584 { 1585 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1590 if (pcbddc->coarse_phi_B) { 1591 PetscScalar *array; 1592 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1593 ierr = PetscFree(array);CHKERRQ(ierr); 1594 } 1595 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1596 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1597 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1598 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1599 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1600 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1601 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1602 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1603 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1604 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1605 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1606 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1607 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1608 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1609 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1610 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1611 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1612 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1613 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1614 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1615 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1616 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1617 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1618 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1619 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1620 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1621 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1622 if (pcbddc->benign_zerodiag_subs) { 1623 PetscInt i; 1624 for (i=0;i<pcbddc->benign_n;i++) { 1625 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1626 } 1627 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1628 } 1629 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1635 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1636 { 1637 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1638 PC_IS *pcis = (PC_IS*)pc->data; 1639 VecType impVecType; 1640 PetscInt n_constraints,n_R,old_size; 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1645 /* get sizes */ 1646 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1647 n_R = pcis->n - pcbddc->n_vertices; 1648 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1649 /* local work vectors (try to avoid unneeded work)*/ 1650 /* R nodes */ 1651 old_size = -1; 1652 if (pcbddc->vec1_R) { 1653 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1654 } 1655 if (n_R != old_size) { 1656 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1657 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1658 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1659 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1660 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1661 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1662 } 1663 /* local primal dofs */ 1664 old_size = -1; 1665 if (pcbddc->vec1_P) { 1666 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1667 } 1668 if (pcbddc->local_primal_size != old_size) { 1669 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1670 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1671 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1672 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1673 } 1674 /* local explicit constraints */ 1675 old_size = -1; 1676 if (pcbddc->vec1_C) { 1677 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1678 } 1679 if (n_constraints && n_constraints != old_size) { 1680 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1681 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1682 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1683 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "PCBDDCSetUpCorrection" 1690 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1691 { 1692 PetscErrorCode ierr; 1693 /* pointers to pcis and pcbddc */ 1694 PC_IS* pcis = (PC_IS*)pc->data; 1695 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1696 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1697 /* submatrices of local problem */ 1698 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1699 /* submatrices of local coarse problem */ 1700 Mat S_VV,S_CV,S_VC,S_CC; 1701 /* working matrices */ 1702 Mat C_CR; 1703 /* additional working stuff */ 1704 PC pc_R; 1705 Mat F; 1706 Vec dummy_vec; 1707 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1708 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1709 PetscScalar *work; 1710 PetscInt *idx_V_B; 1711 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1712 PetscInt i,n_R,n_D,n_B; 1713 1714 /* some shortcuts to scalars */ 1715 PetscScalar one=1.0,m_one=-1.0; 1716 1717 PetscFunctionBegin; 1718 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1719 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1720 } 1721 1722 /* Set Non-overlapping dimensions */ 1723 n_vertices = pcbddc->n_vertices; 1724 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1725 n_B = pcis->n_B; 1726 n_D = pcis->n - n_B; 1727 n_R = pcis->n - n_vertices; 1728 1729 /* vertices in boundary numbering */ 1730 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1731 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1732 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1733 1734 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1735 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1736 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1737 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1739 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1740 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1741 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1743 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1744 1745 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1746 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1747 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1748 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1749 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1750 lda_rhs = n_R; 1751 need_benign_correction = PETSC_FALSE; 1752 if (isLU || isILU || isCHOL) { 1753 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1754 } else if (sub_schurs->reuse_solver) { 1755 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1756 MatFactorType type; 1757 1758 F = reuse_solver->F; 1759 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1760 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1761 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1762 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1763 } else { 1764 F = NULL; 1765 } 1766 1767 /* allocate workspace */ 1768 n = 0; 1769 if (n_constraints) { 1770 n += lda_rhs*n_constraints; 1771 } 1772 if (n_vertices) { 1773 n = PetscMax(2*lda_rhs*n_vertices,n); 1774 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1775 } 1776 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1777 1778 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1779 dummy_vec = NULL; 1780 if (need_benign_correction && lda_rhs != n_R && F) { 1781 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1782 } 1783 1784 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1785 if (n_constraints) { 1786 Mat M1,M2,M3,C_B; 1787 IS is_aux; 1788 PetscScalar *array,*array2; 1789 1790 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1791 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1792 1793 /* Extract constraints on R nodes: C_{CR} */ 1794 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1795 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1796 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1797 1798 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1799 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1800 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1801 for (i=0;i<n_constraints;i++) { 1802 const PetscScalar *row_cmat_values; 1803 const PetscInt *row_cmat_indices; 1804 PetscInt size_of_constraint,j; 1805 1806 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1807 for (j=0;j<size_of_constraint;j++) { 1808 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1809 } 1810 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1811 } 1812 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1813 if (F) { 1814 Mat B; 1815 1816 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1817 if (need_benign_correction) { 1818 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1819 1820 /* rhs is already zero on interior dofs, no need to change the rhs */ 1821 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1822 } 1823 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1824 if (need_benign_correction) { 1825 PetscScalar *marr; 1826 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1827 1828 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1829 if (lda_rhs != n_R) { 1830 for (i=0;i<n_constraints;i++) { 1831 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1832 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1833 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1834 } 1835 } else { 1836 for (i=0;i<n_constraints;i++) { 1837 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1838 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1839 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1840 } 1841 } 1842 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1843 } 1844 ierr = MatDestroy(&B);CHKERRQ(ierr); 1845 } else { 1846 PetscScalar *marr; 1847 1848 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1849 for (i=0;i<n_constraints;i++) { 1850 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1851 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1852 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1853 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1854 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1855 } 1856 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1857 } 1858 if (!pcbddc->switch_static) { 1859 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1860 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1861 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1862 for (i=0;i<n_constraints;i++) { 1863 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1864 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1865 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1868 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1869 } 1870 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1871 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1872 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1873 } else { 1874 if (lda_rhs != n_R) { 1875 IS dummy; 1876 1877 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1878 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1879 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1880 } else { 1881 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1882 pcbddc->local_auxmat2 = local_auxmat2_R; 1883 } 1884 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1885 } 1886 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1887 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1888 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1889 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1890 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1891 if (isCHOL) { 1892 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1893 } else { 1894 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1895 } 1896 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1897 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1898 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1899 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1900 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1901 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1902 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1903 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1904 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1905 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1906 } 1907 1908 /* Get submatrices from subdomain matrix */ 1909 if (n_vertices) { 1910 IS is_aux; 1911 1912 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1913 IS tis; 1914 1915 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1916 ierr = ISSort(tis);CHKERRQ(ierr); 1917 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1918 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1919 } else { 1920 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1921 } 1922 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1923 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1924 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1925 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1926 } 1927 1928 /* Matrix of coarse basis functions (local) */ 1929 if (pcbddc->coarse_phi_B) { 1930 PetscInt on_B,on_primal,on_D=n_D; 1931 if (pcbddc->coarse_phi_D) { 1932 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1933 } 1934 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1935 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1936 PetscScalar *marray; 1937 1938 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1939 ierr = PetscFree(marray);CHKERRQ(ierr); 1940 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1941 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1942 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1943 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1944 } 1945 } 1946 1947 if (!pcbddc->coarse_phi_B) { 1948 PetscScalar *marray; 1949 1950 n = n_B*pcbddc->local_primal_size; 1951 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1952 n += n_D*pcbddc->local_primal_size; 1953 } 1954 if (!pcbddc->symmetric_primal) { 1955 n *= 2; 1956 } 1957 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1958 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1959 n = n_B*pcbddc->local_primal_size; 1960 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1961 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1962 n += n_D*pcbddc->local_primal_size; 1963 } 1964 if (!pcbddc->symmetric_primal) { 1965 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1966 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1967 n = n_B*pcbddc->local_primal_size; 1968 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1969 } 1970 } else { 1971 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1972 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1973 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1974 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1975 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1976 } 1977 } 1978 } 1979 1980 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1981 p0_lidx_I = NULL; 1982 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1983 const PetscInt *idxs; 1984 1985 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1986 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1987 for (i=0;i<pcbddc->benign_n;i++) { 1988 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1989 } 1990 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1991 } 1992 1993 /* vertices */ 1994 if (n_vertices) { 1995 1996 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1997 1998 if (n_R) { 1999 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2000 PetscBLASInt B_N,B_one = 1; 2001 PetscScalar *x,*y; 2002 PetscBool isseqaij; 2003 2004 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2005 if (need_benign_correction) { 2006 ISLocalToGlobalMapping RtoN; 2007 IS is_p0; 2008 PetscInt *idxs_p0,n; 2009 2010 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2011 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2012 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2013 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2014 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2015 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2016 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2017 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2018 } 2019 2020 if (lda_rhs == n_R) { 2021 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2022 } else { 2023 PetscScalar *av,*array; 2024 const PetscInt *xadj,*adjncy; 2025 PetscInt n; 2026 PetscBool flg_row; 2027 2028 array = work+lda_rhs*n_vertices; 2029 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2030 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2031 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2032 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2033 for (i=0;i<n;i++) { 2034 PetscInt j; 2035 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2036 } 2037 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2038 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2039 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2040 } 2041 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2042 if (need_benign_correction) { 2043 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2044 PetscScalar *marr; 2045 2046 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2047 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2048 2049 | 0 0 0 | (V) 2050 L = | 0 0 -1 | (P-p0) 2051 | 0 0 -1 | (p0) 2052 2053 */ 2054 for (i=0;i<reuse_solver->benign_n;i++) { 2055 const PetscScalar *vals; 2056 const PetscInt *idxs,*idxs_zero; 2057 PetscInt n,j,nz; 2058 2059 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2060 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2061 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2062 for (j=0;j<n;j++) { 2063 PetscScalar val = vals[j]; 2064 PetscInt k,col = idxs[j]; 2065 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2066 } 2067 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2068 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2069 } 2070 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2071 } 2072 if (F) { 2073 /* need to correct the rhs */ 2074 if (need_benign_correction) { 2075 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2076 PetscScalar *marr; 2077 2078 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2079 if (lda_rhs != n_R) { 2080 for (i=0;i<n_vertices;i++) { 2081 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2082 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2083 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2084 } 2085 } else { 2086 for (i=0;i<n_vertices;i++) { 2087 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2088 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2089 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2090 } 2091 } 2092 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2093 } 2094 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2095 /* need to correct the solution */ 2096 if (need_benign_correction) { 2097 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2098 PetscScalar *marr; 2099 2100 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2101 if (lda_rhs != n_R) { 2102 for (i=0;i<n_vertices;i++) { 2103 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2104 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2105 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2106 } 2107 } else { 2108 for (i=0;i<n_vertices;i++) { 2109 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2110 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2111 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2112 } 2113 } 2114 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2115 } 2116 } else { 2117 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2118 for (i=0;i<n_vertices;i++) { 2119 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2120 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2121 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2122 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2123 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2124 } 2125 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2126 } 2127 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2128 /* S_VV and S_CV */ 2129 if (n_constraints) { 2130 Mat B; 2131 2132 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2133 for (i=0;i<n_vertices;i++) { 2134 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2135 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2136 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2137 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2138 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2139 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2140 } 2141 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2142 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2143 ierr = MatDestroy(&B);CHKERRQ(ierr); 2144 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2145 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2146 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2148 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2149 ierr = MatDestroy(&B);CHKERRQ(ierr); 2150 } 2151 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2152 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2153 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2154 } 2155 if (lda_rhs != n_R) { 2156 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2157 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2158 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2159 } 2160 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2161 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2162 if (need_benign_correction) { 2163 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2164 PetscScalar *marr,*sums; 2165 2166 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2167 ierr = MatDenseGetArray(S_VVt,&marr); 2168 for (i=0;i<reuse_solver->benign_n;i++) { 2169 const PetscScalar *vals; 2170 const PetscInt *idxs,*idxs_zero; 2171 PetscInt n,j,nz; 2172 2173 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2174 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2175 for (j=0;j<n_vertices;j++) { 2176 PetscInt k; 2177 sums[j] = 0.; 2178 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2179 } 2180 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2181 for (j=0;j<n;j++) { 2182 PetscScalar val = vals[j]; 2183 PetscInt k; 2184 for (k=0;k<n_vertices;k++) { 2185 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2186 } 2187 } 2188 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2189 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2190 } 2191 ierr = PetscFree(sums);CHKERRQ(ierr); 2192 ierr = MatDenseRestoreArray(S_VVt,&marr); 2193 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2194 } 2195 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2196 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2197 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2198 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2199 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2200 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2201 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2202 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2203 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2204 } else { 2205 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2206 } 2207 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2208 2209 /* coarse basis functions */ 2210 for (i=0;i<n_vertices;i++) { 2211 PetscScalar *y; 2212 2213 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2214 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2215 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2216 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2217 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2218 y[n_B*i+idx_V_B[i]] = 1.0; 2219 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2220 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2221 2222 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2223 PetscInt j; 2224 2225 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2226 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2227 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2228 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2229 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2230 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2231 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2232 } 2233 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2234 } 2235 /* if n_R == 0 the object is not destroyed */ 2236 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2237 } 2238 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2239 2240 if (n_constraints) { 2241 Mat B; 2242 2243 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2244 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2245 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2246 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2247 if (n_vertices) { 2248 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2249 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2250 } else { 2251 Mat S_VCt; 2252 2253 if (lda_rhs != n_R) { 2254 ierr = MatDestroy(&B);CHKERRQ(ierr); 2255 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2256 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2257 } 2258 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2259 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2260 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2261 } 2262 } 2263 ierr = MatDestroy(&B);CHKERRQ(ierr); 2264 /* coarse basis functions */ 2265 for (i=0;i<n_constraints;i++) { 2266 PetscScalar *y; 2267 2268 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2269 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2270 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2271 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2272 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2273 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2274 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2275 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2276 PetscInt j; 2277 2278 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2279 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2280 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2281 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2282 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2283 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2284 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2285 } 2286 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2287 } 2288 } 2289 if (n_constraints) { 2290 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2291 } 2292 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2293 2294 /* coarse matrix entries relative to B_0 */ 2295 if (pcbddc->benign_n) { 2296 Mat B0_B,B0_BPHI; 2297 IS is_dummy; 2298 PetscScalar *data; 2299 PetscInt j; 2300 2301 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2302 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2303 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2304 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2305 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2306 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2307 for (j=0;j<pcbddc->benign_n;j++) { 2308 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2309 for (i=0;i<pcbddc->local_primal_size;i++) { 2310 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2311 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2312 } 2313 } 2314 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2315 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2316 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2317 } 2318 2319 /* compute other basis functions for non-symmetric problems */ 2320 if (!pcbddc->symmetric_primal) { 2321 Mat B_V=NULL,B_C=NULL; 2322 PetscScalar *marray; 2323 2324 if (n_constraints) { 2325 Mat S_CCT,C_CRT; 2326 2327 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2328 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2329 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2330 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2331 if (n_vertices) { 2332 Mat S_VCT; 2333 2334 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2335 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2336 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2337 } 2338 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2339 } 2340 if (n_vertices && n_R) { 2341 PetscScalar *av,*marray; 2342 const PetscInt *xadj,*adjncy; 2343 PetscInt n; 2344 PetscBool flg_row; 2345 2346 /* B_V = B_V - A_VR^T */ 2347 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2348 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2349 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2350 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2351 for (i=0;i<n;i++) { 2352 PetscInt j; 2353 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2354 } 2355 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2356 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2357 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2358 } 2359 2360 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2361 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2362 for (i=0;i<n_vertices;i++) { 2363 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2364 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2365 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2366 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2367 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2368 } 2369 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2370 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2371 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2372 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2373 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2374 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2375 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2376 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2377 } 2378 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2379 /* coarse basis functions */ 2380 for (i=0;i<pcbddc->local_primal_size;i++) { 2381 PetscScalar *y; 2382 2383 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2384 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2385 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2386 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2387 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2388 if (i<n_vertices) { 2389 y[n_B*i+idx_V_B[i]] = 1.0; 2390 } 2391 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2392 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2393 2394 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2395 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2396 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2397 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2398 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2399 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2400 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2401 } 2402 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2403 } 2404 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2405 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2406 } 2407 /* free memory */ 2408 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2409 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2410 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2411 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2412 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2413 ierr = PetscFree(work);CHKERRQ(ierr); 2414 if (n_vertices) { 2415 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2416 } 2417 if (n_constraints) { 2418 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2419 } 2420 /* Checking coarse_sub_mat and coarse basis functios */ 2421 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2422 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2423 if (pcbddc->dbg_flag) { 2424 Mat coarse_sub_mat; 2425 Mat AUXMAT,TM1,TM2,TM3,TM4; 2426 Mat coarse_phi_D,coarse_phi_B; 2427 Mat coarse_psi_D,coarse_psi_B; 2428 Mat A_II,A_BB,A_IB,A_BI; 2429 Mat C_B,CPHI; 2430 IS is_dummy; 2431 Vec mones; 2432 MatType checkmattype=MATSEQAIJ; 2433 PetscReal real_value; 2434 2435 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2436 Mat A; 2437 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2438 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2439 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2440 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2441 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2442 ierr = MatDestroy(&A);CHKERRQ(ierr); 2443 } else { 2444 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2445 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2446 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2447 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2448 } 2449 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2450 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2451 if (!pcbddc->symmetric_primal) { 2452 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2453 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2454 } 2455 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2456 2457 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2458 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2459 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2460 if (!pcbddc->symmetric_primal) { 2461 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2462 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2463 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2464 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2465 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2466 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2467 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2468 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2469 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2470 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2471 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2472 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2473 } else { 2474 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2475 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2476 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2477 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2478 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2479 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2480 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2481 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2482 } 2483 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2484 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2485 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2486 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2487 if (pcbddc->benign_n) { 2488 Mat B0_B,B0_BPHI; 2489 PetscScalar *data,*data2; 2490 PetscInt j; 2491 2492 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2493 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2494 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2495 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2496 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2497 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2498 for (j=0;j<pcbddc->benign_n;j++) { 2499 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2500 for (i=0;i<pcbddc->local_primal_size;i++) { 2501 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2502 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2503 } 2504 } 2505 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2506 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2507 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2508 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2509 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2510 } 2511 #if 0 2512 { 2513 PetscViewer viewer; 2514 char filename[256]; 2515 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2516 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2517 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2518 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2519 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2520 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2521 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2522 if (save_change) { 2523 Mat phi_B; 2524 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2525 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2526 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2527 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2528 } else { 2529 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2530 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2531 } 2532 if (pcbddc->coarse_phi_D) { 2533 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2534 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2535 } 2536 if (pcbddc->coarse_psi_B) { 2537 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2538 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2539 } 2540 if (pcbddc->coarse_psi_D) { 2541 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2542 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2543 } 2544 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2545 } 2546 #endif 2547 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2548 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2549 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2550 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2551 2552 /* check constraints */ 2553 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2554 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2555 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2556 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2557 } else { 2558 PetscScalar *data; 2559 Mat tmat; 2560 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2562 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2563 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2564 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2565 } 2566 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2567 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2568 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2569 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2571 if (!pcbddc->symmetric_primal) { 2572 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2573 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2574 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2575 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2577 } 2578 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2579 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2580 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2581 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2583 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2584 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2585 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2586 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2587 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2588 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2589 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2590 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2591 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2592 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2593 if (!pcbddc->symmetric_primal) { 2594 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2595 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2596 } 2597 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2598 } 2599 /* get back data */ 2600 *coarse_submat_vals_n = coarse_submat_vals; 2601 PetscFunctionReturn(0); 2602 } 2603 2604 #undef __FUNCT__ 2605 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2606 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2607 { 2608 Mat *work_mat; 2609 IS isrow_s,iscol_s; 2610 PetscBool rsorted,csorted; 2611 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2612 PetscErrorCode ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2616 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2617 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2618 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2619 2620 if (!rsorted) { 2621 const PetscInt *idxs; 2622 PetscInt *idxs_sorted,i; 2623 2624 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2625 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2626 for (i=0;i<rsize;i++) { 2627 idxs_perm_r[i] = i; 2628 } 2629 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2630 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2631 for (i=0;i<rsize;i++) { 2632 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2633 } 2634 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2635 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2636 } else { 2637 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2638 isrow_s = isrow; 2639 } 2640 2641 if (!csorted) { 2642 if (isrow == iscol) { 2643 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2644 iscol_s = isrow_s; 2645 } else { 2646 const PetscInt *idxs; 2647 PetscInt *idxs_sorted,i; 2648 2649 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2650 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2651 for (i=0;i<csize;i++) { 2652 idxs_perm_c[i] = i; 2653 } 2654 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2655 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2656 for (i=0;i<csize;i++) { 2657 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2658 } 2659 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2660 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2661 } 2662 } else { 2663 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2664 iscol_s = iscol; 2665 } 2666 2667 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2668 2669 if (!rsorted || !csorted) { 2670 Mat new_mat; 2671 IS is_perm_r,is_perm_c; 2672 2673 if (!rsorted) { 2674 PetscInt *idxs_r,i; 2675 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2676 for (i=0;i<rsize;i++) { 2677 idxs_r[idxs_perm_r[i]] = i; 2678 } 2679 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2680 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2681 } else { 2682 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2683 } 2684 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2685 2686 if (!csorted) { 2687 if (isrow_s == iscol_s) { 2688 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2689 is_perm_c = is_perm_r; 2690 } else { 2691 PetscInt *idxs_c,i; 2692 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2693 for (i=0;i<csize;i++) { 2694 idxs_c[idxs_perm_c[i]] = i; 2695 } 2696 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2697 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2698 } 2699 } else { 2700 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2701 } 2702 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2703 2704 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2705 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2706 work_mat[0] = new_mat; 2707 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2708 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2709 } 2710 2711 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2712 *B = work_mat[0]; 2713 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2714 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2715 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 #undef __FUNCT__ 2720 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2721 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2722 { 2723 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2724 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2725 Mat new_mat; 2726 IS is_local,is_global; 2727 PetscInt local_size; 2728 PetscBool isseqaij; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2733 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2734 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2735 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2736 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2737 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2738 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2739 2740 /* check */ 2741 if (pcbddc->dbg_flag) { 2742 Vec x,x_change; 2743 PetscReal error; 2744 2745 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2746 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2747 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2748 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2749 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2750 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2751 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2752 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2753 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2754 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2755 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2756 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2757 ierr = VecDestroy(&x);CHKERRQ(ierr); 2758 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2759 } 2760 2761 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2762 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2763 if (isseqaij) { 2764 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2765 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2766 } else { 2767 Mat work_mat; 2768 2769 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2770 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2771 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2772 } 2773 if (matis->A->symmetric_set) { 2774 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2775 #if !defined(PETSC_USE_COMPLEX) 2776 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2777 #endif 2778 } 2779 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2780 PetscFunctionReturn(0); 2781 } 2782 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2785 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2786 { 2787 PC_IS* pcis = (PC_IS*)(pc->data); 2788 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2789 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2790 PetscInt *idx_R_local=NULL; 2791 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2792 PetscInt vbs,bs; 2793 PetscBT bitmask=NULL; 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 /* 2798 No need to setup local scatters if 2799 - primal space is unchanged 2800 AND 2801 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2802 AND 2803 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2804 */ 2805 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2806 PetscFunctionReturn(0); 2807 } 2808 /* destroy old objects */ 2809 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2810 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2811 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2812 /* Set Non-overlapping dimensions */ 2813 n_B = pcis->n_B; 2814 n_D = pcis->n - n_B; 2815 n_vertices = pcbddc->n_vertices; 2816 2817 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2818 2819 /* create auxiliary bitmask and allocate workspace */ 2820 if (!sub_schurs->reuse_solver) { 2821 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2822 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2823 for (i=0;i<n_vertices;i++) { 2824 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2825 } 2826 2827 for (i=0, n_R=0; i<pcis->n; i++) { 2828 if (!PetscBTLookup(bitmask,i)) { 2829 idx_R_local[n_R++] = i; 2830 } 2831 } 2832 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2833 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2834 2835 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2836 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2837 } 2838 2839 /* Block code */ 2840 vbs = 1; 2841 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2842 if (bs>1 && !(n_vertices%bs)) { 2843 PetscBool is_blocked = PETSC_TRUE; 2844 PetscInt *vary; 2845 if (!sub_schurs->reuse_solver) { 2846 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2847 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2848 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2849 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2850 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2851 for (i=0; i<pcis->n/bs; i++) { 2852 if (vary[i]!=0 && vary[i]!=bs) { 2853 is_blocked = PETSC_FALSE; 2854 break; 2855 } 2856 } 2857 ierr = PetscFree(vary);CHKERRQ(ierr); 2858 } else { 2859 /* Verify directly the R set */ 2860 for (i=0; i<n_R/bs; i++) { 2861 PetscInt j,node=idx_R_local[bs*i]; 2862 for (j=1; j<bs; j++) { 2863 if (node != idx_R_local[bs*i+j]-j) { 2864 is_blocked = PETSC_FALSE; 2865 break; 2866 } 2867 } 2868 } 2869 } 2870 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2871 vbs = bs; 2872 for (i=0;i<n_R/vbs;i++) { 2873 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2874 } 2875 } 2876 } 2877 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2878 if (sub_schurs->reuse_solver) { 2879 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2880 2881 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2882 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2883 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2884 reuse_solver->is_R = pcbddc->is_R_local; 2885 } else { 2886 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2887 } 2888 2889 /* print some info if requested */ 2890 if (pcbddc->dbg_flag) { 2891 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2892 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2893 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2898 } 2899 2900 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2901 if (!sub_schurs->reuse_solver) { 2902 IS is_aux1,is_aux2; 2903 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2904 2905 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2906 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2907 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2908 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2909 for (i=0; i<n_D; i++) { 2910 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2911 } 2912 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2913 for (i=0, j=0; i<n_R; i++) { 2914 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2915 aux_array1[j++] = i; 2916 } 2917 } 2918 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2919 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2920 for (i=0, j=0; i<n_B; i++) { 2921 if (!PetscBTLookup(bitmask,is_indices[i])) { 2922 aux_array2[j++] = i; 2923 } 2924 } 2925 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2926 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2927 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2928 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2929 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2930 2931 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2932 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2933 for (i=0, j=0; i<n_R; i++) { 2934 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2935 aux_array1[j++] = i; 2936 } 2937 } 2938 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2939 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2940 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2941 } 2942 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2943 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2944 } else { 2945 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2946 IS tis; 2947 PetscInt schur_size; 2948 2949 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2950 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2951 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2952 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2953 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2954 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2955 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2956 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2957 } 2958 } 2959 PetscFunctionReturn(0); 2960 } 2961 2962 2963 #undef __FUNCT__ 2964 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2965 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2966 { 2967 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2968 PC_IS *pcis = (PC_IS*)pc->data; 2969 PC pc_temp; 2970 Mat A_RR; 2971 MatReuse reuse; 2972 PetscScalar m_one = -1.0; 2973 PetscReal value; 2974 PetscInt n_D,n_R; 2975 PetscBool use_exact,use_exact_reduced,issbaij; 2976 PetscErrorCode ierr; 2977 /* prefixes stuff */ 2978 char dir_prefix[256],neu_prefix[256],str_level[16]; 2979 size_t len; 2980 2981 PetscFunctionBegin; 2982 2983 /* compute prefixes */ 2984 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2985 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2986 if (!pcbddc->current_level) { 2987 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2988 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2989 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2990 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2991 } else { 2992 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2993 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2994 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2995 len -= 15; /* remove "pc_bddc_coarse_" */ 2996 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2997 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2998 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2999 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3000 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3001 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3002 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3003 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3004 } 3005 3006 /* DIRICHLET PROBLEM */ 3007 if (dirichlet) { 3008 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3009 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3010 if (!sub_schurs->reuse_solver) { 3011 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3012 } 3013 if (pcbddc->dbg_flag) { 3014 Mat A_IIn; 3015 3016 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3017 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3018 pcis->A_II = A_IIn; 3019 } 3020 } 3021 if (pcbddc->local_mat->symmetric_set) { 3022 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3023 } 3024 /* Matrix for Dirichlet problem is pcis->A_II */ 3025 n_D = pcis->n - pcis->n_B; 3026 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3027 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3028 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3029 /* default */ 3030 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3031 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3032 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3033 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3034 if (issbaij) { 3035 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3036 } else { 3037 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3038 } 3039 /* Allow user's customization */ 3040 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3041 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3042 } 3043 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3044 if (sub_schurs->reuse_solver) { 3045 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3046 3047 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3048 } 3049 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3050 if (!n_D) { 3051 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3052 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3053 } 3054 /* Set Up KSP for Dirichlet problem of BDDC */ 3055 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3056 /* set ksp_D into pcis data */ 3057 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3058 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3059 pcis->ksp_D = pcbddc->ksp_D; 3060 } 3061 3062 /* NEUMANN PROBLEM */ 3063 A_RR = 0; 3064 if (neumann) { 3065 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3066 PetscInt ibs,mbs; 3067 PetscBool issbaij; 3068 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3069 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3070 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3071 if (pcbddc->ksp_R) { /* already created ksp */ 3072 PetscInt nn_R; 3073 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3074 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3075 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3076 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3077 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3078 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3079 reuse = MAT_INITIAL_MATRIX; 3080 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3081 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3082 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3083 reuse = MAT_INITIAL_MATRIX; 3084 } else { /* safe to reuse the matrix */ 3085 reuse = MAT_REUSE_MATRIX; 3086 } 3087 } 3088 /* last check */ 3089 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3090 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3091 reuse = MAT_INITIAL_MATRIX; 3092 } 3093 } else { /* first time, so we need to create the matrix */ 3094 reuse = MAT_INITIAL_MATRIX; 3095 } 3096 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3097 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3098 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3099 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3100 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3101 if (matis->A == pcbddc->local_mat) { 3102 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3103 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3104 } else { 3105 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3106 } 3107 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3108 if (matis->A == pcbddc->local_mat) { 3109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3110 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3111 } else { 3112 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3113 } 3114 } 3115 /* extract A_RR */ 3116 if (sub_schurs->reuse_solver) { 3117 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3118 3119 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3120 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3121 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3122 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3123 } else { 3124 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3125 } 3126 } else { 3127 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3128 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3129 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3130 } 3131 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3132 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3133 } 3134 if (pcbddc->local_mat->symmetric_set) { 3135 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3136 } 3137 if (!pcbddc->ksp_R) { /* create object if not present */ 3138 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3139 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3140 /* default */ 3141 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3142 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3143 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3144 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3145 if (issbaij) { 3146 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3147 } else { 3148 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3149 } 3150 /* Allow user's customization */ 3151 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3152 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3153 } 3154 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3155 if (!n_R) { 3156 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3157 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3158 } 3159 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3160 /* Reuse solver if it is present */ 3161 if (sub_schurs->reuse_solver) { 3162 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3163 3164 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3165 } 3166 /* Set Up KSP for Neumann problem of BDDC */ 3167 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3168 } 3169 3170 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 3171 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 3172 if (pcbddc->dbg_flag) { 3173 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3174 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3175 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3176 } 3177 if (dirichlet) { /* Dirichlet */ 3178 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3179 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3180 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3181 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3182 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3183 /* need to be adapted? */ 3184 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3185 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3186 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 3187 /* print info */ 3188 if (pcbddc->dbg_flag) { 3189 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3190 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3191 } 3192 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3193 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3194 } 3195 } 3196 if (neumann) { /* Neumann */ 3197 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3198 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3199 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3200 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3201 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3202 /* need to be adapted? */ 3203 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3204 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3205 /* print info */ 3206 if (pcbddc->dbg_flag) { 3207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3208 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3209 } 3210 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3211 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3212 } 3213 } 3214 } 3215 /* free Neumann problem's matrix */ 3216 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 #undef __FUNCT__ 3221 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3222 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3223 { 3224 PetscErrorCode ierr; 3225 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3226 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3227 3228 PetscFunctionBegin; 3229 if (!sub_schurs->reuse_solver) { 3230 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3231 } 3232 if (!pcbddc->switch_static) { 3233 if (applytranspose && pcbddc->local_auxmat1) { 3234 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3235 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3236 } 3237 if (!sub_schurs->reuse_solver) { 3238 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3239 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3240 } else { 3241 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3242 3243 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3244 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3245 } 3246 } else { 3247 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3248 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3249 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3250 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3251 if (applytranspose && pcbddc->local_auxmat1) { 3252 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3253 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3254 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3255 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3256 } 3257 } 3258 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3259 if (applytranspose) { 3260 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3261 } else { 3262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3263 } 3264 } else { 3265 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3266 3267 if (applytranspose) { 3268 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3269 } else { 3270 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3271 } 3272 } 3273 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3274 if (!pcbddc->switch_static) { 3275 if (!sub_schurs->reuse_solver) { 3276 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3277 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3278 } else { 3279 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3280 3281 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3282 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3283 } 3284 if (!applytranspose && pcbddc->local_auxmat1) { 3285 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3286 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3287 } 3288 } else { 3289 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3290 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3291 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3292 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3293 if (!applytranspose && pcbddc->local_auxmat1) { 3294 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3295 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3296 } 3297 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3298 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3299 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3300 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3301 } 3302 PetscFunctionReturn(0); 3303 } 3304 3305 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3308 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3309 { 3310 PetscErrorCode ierr; 3311 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3312 PC_IS* pcis = (PC_IS*) (pc->data); 3313 const PetscScalar zero = 0.0; 3314 3315 PetscFunctionBegin; 3316 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3317 if (!pcbddc->benign_apply_coarse_only) { 3318 if (applytranspose) { 3319 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3320 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3321 } else { 3322 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3323 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3324 } 3325 } else { 3326 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3327 } 3328 3329 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3330 if (pcbddc->benign_n) { 3331 PetscScalar *array; 3332 PetscInt j; 3333 3334 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3335 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3336 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3337 } 3338 3339 /* start communications from local primal nodes to rhs of coarse solver */ 3340 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3341 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3342 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3343 3344 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3345 /* TODO remove null space when doing multilevel */ 3346 if (pcbddc->coarse_ksp) { 3347 Mat coarse_mat; 3348 Vec rhs,sol; 3349 MatNullSpace nullsp; 3350 PetscBool isbddc = PETSC_FALSE; 3351 3352 if (pcbddc->benign_have_null) { 3353 PC coarse_pc; 3354 3355 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3356 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3357 /* we need to propagate to coarser levels the need for a possible benign correction */ 3358 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3359 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3360 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3361 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3362 } 3363 } 3364 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3365 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3366 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3367 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3368 if (nullsp) { 3369 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3370 } 3371 if (applytranspose) { 3372 if (pcbddc->benign_apply_coarse_only) { 3373 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3374 } else { 3375 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3376 } 3377 } else { 3378 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3379 PC coarse_pc; 3380 3381 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3382 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3383 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3384 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3385 } else { 3386 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3387 } 3388 } 3389 /* we don't the benign correction at coarser levels anymore */ 3390 if (pcbddc->benign_have_null && isbddc) { 3391 PC coarse_pc; 3392 PC_BDDC* coarsepcbddc; 3393 3394 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3395 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3396 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3397 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3398 } 3399 if (nullsp) { 3400 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3401 } 3402 } 3403 3404 /* Local solution on R nodes */ 3405 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3406 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3407 } 3408 /* communications from coarse sol to local primal nodes */ 3409 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3410 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3411 3412 /* Sum contributions from the two levels */ 3413 if (!pcbddc->benign_apply_coarse_only) { 3414 if (applytranspose) { 3415 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3416 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3417 } else { 3418 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3419 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3420 } 3421 /* store p0 */ 3422 if (pcbddc->benign_n) { 3423 PetscScalar *array; 3424 PetscInt j; 3425 3426 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3427 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3428 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3429 } 3430 } else { /* expand the coarse solution */ 3431 if (applytranspose) { 3432 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3433 } else { 3434 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3435 } 3436 } 3437 PetscFunctionReturn(0); 3438 } 3439 3440 #undef __FUNCT__ 3441 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3442 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3443 { 3444 PetscErrorCode ierr; 3445 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3446 PetscScalar *array; 3447 Vec from,to; 3448 3449 PetscFunctionBegin; 3450 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3451 from = pcbddc->coarse_vec; 3452 to = pcbddc->vec1_P; 3453 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3454 Vec tvec; 3455 3456 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3457 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3458 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3459 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3460 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3461 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3462 } 3463 } else { /* from local to global -> put data in coarse right hand side */ 3464 from = pcbddc->vec1_P; 3465 to = pcbddc->coarse_vec; 3466 } 3467 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3468 PetscFunctionReturn(0); 3469 } 3470 3471 #undef __FUNCT__ 3472 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3473 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3474 { 3475 PetscErrorCode ierr; 3476 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3477 PetscScalar *array; 3478 Vec from,to; 3479 3480 PetscFunctionBegin; 3481 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3482 from = pcbddc->coarse_vec; 3483 to = pcbddc->vec1_P; 3484 } else { /* from local to global -> put data in coarse right hand side */ 3485 from = pcbddc->vec1_P; 3486 to = pcbddc->coarse_vec; 3487 } 3488 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3489 if (smode == SCATTER_FORWARD) { 3490 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3491 Vec tvec; 3492 3493 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3494 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3495 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3496 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3497 } 3498 } else { 3499 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3500 ierr = VecResetArray(from);CHKERRQ(ierr); 3501 } 3502 } 3503 PetscFunctionReturn(0); 3504 } 3505 3506 /* uncomment for testing purposes */ 3507 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3510 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3511 { 3512 PetscErrorCode ierr; 3513 PC_IS* pcis = (PC_IS*)(pc->data); 3514 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3515 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3516 /* one and zero */ 3517 PetscScalar one=1.0,zero=0.0; 3518 /* space to store constraints and their local indices */ 3519 PetscScalar *constraints_data; 3520 PetscInt *constraints_idxs,*constraints_idxs_B; 3521 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3522 PetscInt *constraints_n; 3523 /* iterators */ 3524 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3525 /* BLAS integers */ 3526 PetscBLASInt lwork,lierr; 3527 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3528 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3529 /* reuse */ 3530 PetscInt olocal_primal_size,olocal_primal_size_cc; 3531 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3532 /* change of basis */ 3533 PetscBool qr_needed; 3534 PetscBT change_basis,qr_needed_idx; 3535 /* auxiliary stuff */ 3536 PetscInt *nnz,*is_indices; 3537 PetscInt ncc; 3538 /* some quantities */ 3539 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3540 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3541 3542 PetscFunctionBegin; 3543 /* Destroy Mat objects computed previously */ 3544 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3545 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3546 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3547 /* save info on constraints from previous setup (if any) */ 3548 olocal_primal_size = pcbddc->local_primal_size; 3549 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3550 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3551 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3552 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3553 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3554 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3555 3556 if (!pcbddc->adaptive_selection) { 3557 IS ISForVertices,*ISForFaces,*ISForEdges; 3558 MatNullSpace nearnullsp; 3559 const Vec *nearnullvecs; 3560 Vec *localnearnullsp; 3561 PetscScalar *array; 3562 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3563 PetscBool nnsp_has_cnst; 3564 /* LAPACK working arrays for SVD or POD */ 3565 PetscBool skip_lapack,boolforchange; 3566 PetscScalar *work; 3567 PetscReal *singular_vals; 3568 #if defined(PETSC_USE_COMPLEX) 3569 PetscReal *rwork; 3570 #endif 3571 #if defined(PETSC_MISSING_LAPACK_GESVD) 3572 PetscScalar *temp_basis,*correlation_mat; 3573 #else 3574 PetscBLASInt dummy_int=1; 3575 PetscScalar dummy_scalar=1.; 3576 #endif 3577 3578 /* Get index sets for faces, edges and vertices from graph */ 3579 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3580 /* print some info */ 3581 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3582 PetscInt nv; 3583 3584 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3585 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3586 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3587 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3590 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3592 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3593 } 3594 3595 /* free unneeded index sets */ 3596 if (!pcbddc->use_vertices) { 3597 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3598 } 3599 if (!pcbddc->use_edges) { 3600 for (i=0;i<n_ISForEdges;i++) { 3601 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3602 } 3603 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3604 n_ISForEdges = 0; 3605 } 3606 if (!pcbddc->use_faces) { 3607 for (i=0;i<n_ISForFaces;i++) { 3608 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3609 } 3610 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3611 n_ISForFaces = 0; 3612 } 3613 3614 #if defined(PETSC_USE_DEBUG) 3615 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3616 Also use_change_of_basis should be consistent among processors */ 3617 if (pcbddc->NullSpace) { 3618 PetscBool tbool[2],gbool[2]; 3619 3620 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3621 pcbddc->use_change_of_basis = PETSC_TRUE; 3622 if (!ISForEdges) { 3623 pcbddc->use_change_on_faces = PETSC_TRUE; 3624 } 3625 } 3626 tbool[0] = pcbddc->use_change_of_basis; 3627 tbool[1] = pcbddc->use_change_on_faces; 3628 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3629 pcbddc->use_change_of_basis = gbool[0]; 3630 pcbddc->use_change_on_faces = gbool[1]; 3631 } 3632 #endif 3633 3634 /* check if near null space is attached to global mat */ 3635 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3636 if (nearnullsp) { 3637 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3638 /* remove any stored info */ 3639 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3640 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3641 /* store information for BDDC solver reuse */ 3642 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3643 pcbddc->onearnullspace = nearnullsp; 3644 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3645 for (i=0;i<nnsp_size;i++) { 3646 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3647 } 3648 } else { /* if near null space is not provided BDDC uses constants by default */ 3649 nnsp_size = 0; 3650 nnsp_has_cnst = PETSC_TRUE; 3651 } 3652 /* get max number of constraints on a single cc */ 3653 max_constraints = nnsp_size; 3654 if (nnsp_has_cnst) max_constraints++; 3655 3656 /* 3657 Evaluate maximum storage size needed by the procedure 3658 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3659 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3660 There can be multiple constraints per connected component 3661 */ 3662 n_vertices = 0; 3663 if (ISForVertices) { 3664 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3665 } 3666 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3667 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3668 3669 total_counts = n_ISForFaces+n_ISForEdges; 3670 total_counts *= max_constraints; 3671 total_counts += n_vertices; 3672 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3673 3674 total_counts = 0; 3675 max_size_of_constraint = 0; 3676 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3677 IS used_is; 3678 if (i<n_ISForEdges) { 3679 used_is = ISForEdges[i]; 3680 } else { 3681 used_is = ISForFaces[i-n_ISForEdges]; 3682 } 3683 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3684 total_counts += j; 3685 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3686 } 3687 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3688 3689 /* get local part of global near null space vectors */ 3690 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3691 for (k=0;k<nnsp_size;k++) { 3692 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3693 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3694 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3695 } 3696 3697 /* whether or not to skip lapack calls */ 3698 skip_lapack = PETSC_TRUE; 3699 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3700 3701 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3702 if (!skip_lapack) { 3703 PetscScalar temp_work; 3704 3705 #if defined(PETSC_MISSING_LAPACK_GESVD) 3706 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3707 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3708 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3709 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3710 #if defined(PETSC_USE_COMPLEX) 3711 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3712 #endif 3713 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3714 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3715 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3716 lwork = -1; 3717 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3718 #if !defined(PETSC_USE_COMPLEX) 3719 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3720 #else 3721 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3722 #endif 3723 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3724 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3725 #else /* on missing GESVD */ 3726 /* SVD */ 3727 PetscInt max_n,min_n; 3728 max_n = max_size_of_constraint; 3729 min_n = max_constraints; 3730 if (max_size_of_constraint < max_constraints) { 3731 min_n = max_size_of_constraint; 3732 max_n = max_constraints; 3733 } 3734 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3735 #if defined(PETSC_USE_COMPLEX) 3736 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3737 #endif 3738 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3739 lwork = -1; 3740 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3741 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3742 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3743 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3744 #if !defined(PETSC_USE_COMPLEX) 3745 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3746 #else 3747 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3748 #endif 3749 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3750 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3751 #endif /* on missing GESVD */ 3752 /* Allocate optimal workspace */ 3753 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3754 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3755 } 3756 /* Now we can loop on constraining sets */ 3757 total_counts = 0; 3758 constraints_idxs_ptr[0] = 0; 3759 constraints_data_ptr[0] = 0; 3760 /* vertices */ 3761 if (n_vertices) { 3762 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3763 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3764 for (i=0;i<n_vertices;i++) { 3765 constraints_n[total_counts] = 1; 3766 constraints_data[total_counts] = 1.0; 3767 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3768 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3769 total_counts++; 3770 } 3771 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3772 n_vertices = total_counts; 3773 } 3774 3775 /* edges and faces */ 3776 total_counts_cc = total_counts; 3777 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3778 IS used_is; 3779 PetscBool idxs_copied = PETSC_FALSE; 3780 3781 if (ncc<n_ISForEdges) { 3782 used_is = ISForEdges[ncc]; 3783 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3784 } else { 3785 used_is = ISForFaces[ncc-n_ISForEdges]; 3786 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3787 } 3788 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3789 3790 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3791 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3792 /* change of basis should not be performed on local periodic nodes */ 3793 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3794 if (nnsp_has_cnst) { 3795 PetscScalar quad_value; 3796 3797 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3798 idxs_copied = PETSC_TRUE; 3799 3800 if (!pcbddc->use_nnsp_true) { 3801 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3802 } else { 3803 quad_value = 1.0; 3804 } 3805 for (j=0;j<size_of_constraint;j++) { 3806 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3807 } 3808 temp_constraints++; 3809 total_counts++; 3810 } 3811 for (k=0;k<nnsp_size;k++) { 3812 PetscReal real_value; 3813 PetscScalar *ptr_to_data; 3814 3815 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3816 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3817 for (j=0;j<size_of_constraint;j++) { 3818 ptr_to_data[j] = array[is_indices[j]]; 3819 } 3820 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3821 /* check if array is null on the connected component */ 3822 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3823 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3824 if (real_value > 0.0) { /* keep indices and values */ 3825 temp_constraints++; 3826 total_counts++; 3827 if (!idxs_copied) { 3828 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3829 idxs_copied = PETSC_TRUE; 3830 } 3831 } 3832 } 3833 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3834 valid_constraints = temp_constraints; 3835 if (!pcbddc->use_nnsp_true && temp_constraints) { 3836 if (temp_constraints == 1) { /* just normalize the constraint */ 3837 PetscScalar norm,*ptr_to_data; 3838 3839 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3840 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3841 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3842 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3843 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3844 } else { /* perform SVD */ 3845 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3846 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3847 3848 #if defined(PETSC_MISSING_LAPACK_GESVD) 3849 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3850 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3851 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3852 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3853 from that computed using LAPACKgesvd 3854 -> This is due to a different computation of eigenvectors in LAPACKheev 3855 -> The quality of the POD-computed basis will be the same */ 3856 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3857 /* Store upper triangular part of correlation matrix */ 3858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3859 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3860 for (j=0;j<temp_constraints;j++) { 3861 for (k=0;k<j+1;k++) { 3862 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3863 } 3864 } 3865 /* compute eigenvalues and eigenvectors of correlation matrix */ 3866 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3867 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3868 #if !defined(PETSC_USE_COMPLEX) 3869 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3870 #else 3871 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3872 #endif 3873 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3874 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3875 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3876 j = 0; 3877 while (j < temp_constraints && singular_vals[j] < tol) j++; 3878 total_counts = total_counts-j; 3879 valid_constraints = temp_constraints-j; 3880 /* scale and copy POD basis into used quadrature memory */ 3881 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3882 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3883 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3884 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3885 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3886 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3887 if (j<temp_constraints) { 3888 PetscInt ii; 3889 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3890 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3891 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3892 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3893 for (k=0;k<temp_constraints-j;k++) { 3894 for (ii=0;ii<size_of_constraint;ii++) { 3895 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3896 } 3897 } 3898 } 3899 #else /* on missing GESVD */ 3900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3901 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3904 #if !defined(PETSC_USE_COMPLEX) 3905 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3906 #else 3907 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3908 #endif 3909 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3910 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3911 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3912 k = temp_constraints; 3913 if (k > size_of_constraint) k = size_of_constraint; 3914 j = 0; 3915 while (j < k && singular_vals[k-j-1] < tol) j++; 3916 valid_constraints = k-j; 3917 total_counts = total_counts-temp_constraints+valid_constraints; 3918 #endif /* on missing GESVD */ 3919 } 3920 } 3921 /* update pointers information */ 3922 if (valid_constraints) { 3923 constraints_n[total_counts_cc] = valid_constraints; 3924 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3925 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3926 /* set change_of_basis flag */ 3927 if (boolforchange) { 3928 PetscBTSet(change_basis,total_counts_cc); 3929 } 3930 total_counts_cc++; 3931 } 3932 } 3933 /* free workspace */ 3934 if (!skip_lapack) { 3935 ierr = PetscFree(work);CHKERRQ(ierr); 3936 #if defined(PETSC_USE_COMPLEX) 3937 ierr = PetscFree(rwork);CHKERRQ(ierr); 3938 #endif 3939 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3940 #if defined(PETSC_MISSING_LAPACK_GESVD) 3941 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3942 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3943 #endif 3944 } 3945 for (k=0;k<nnsp_size;k++) { 3946 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3947 } 3948 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3949 /* free index sets of faces, edges and vertices */ 3950 for (i=0;i<n_ISForFaces;i++) { 3951 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3952 } 3953 if (n_ISForFaces) { 3954 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3955 } 3956 for (i=0;i<n_ISForEdges;i++) { 3957 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3958 } 3959 if (n_ISForEdges) { 3960 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3961 } 3962 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3963 } else { 3964 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3965 3966 total_counts = 0; 3967 n_vertices = 0; 3968 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3969 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3970 } 3971 max_constraints = 0; 3972 total_counts_cc = 0; 3973 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3974 total_counts += pcbddc->adaptive_constraints_n[i]; 3975 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3976 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3977 } 3978 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3979 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3980 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3981 constraints_data = pcbddc->adaptive_constraints_data; 3982 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3983 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3984 total_counts_cc = 0; 3985 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3986 if (pcbddc->adaptive_constraints_n[i]) { 3987 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3988 } 3989 } 3990 #if 0 3991 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3992 for (i=0;i<total_counts_cc;i++) { 3993 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3994 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3995 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3996 printf(" %d",constraints_idxs[j]); 3997 } 3998 printf("\n"); 3999 printf("number of cc: %d\n",constraints_n[i]); 4000 } 4001 for (i=0;i<n_vertices;i++) { 4002 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4003 } 4004 for (i=0;i<sub_schurs->n_subs;i++) { 4005 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4006 } 4007 #endif 4008 4009 max_size_of_constraint = 0; 4010 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4011 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4012 /* Change of basis */ 4013 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4014 if (pcbddc->use_change_of_basis) { 4015 for (i=0;i<sub_schurs->n_subs;i++) { 4016 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4017 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4018 } 4019 } 4020 } 4021 } 4022 pcbddc->local_primal_size = total_counts; 4023 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4024 4025 /* map constraints_idxs in boundary numbering */ 4026 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4027 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4028 4029 /* Create constraint matrix */ 4030 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4031 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4032 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4033 4034 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4035 /* determine if a QR strategy is needed for change of basis */ 4036 qr_needed = PETSC_FALSE; 4037 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4038 total_primal_vertices=0; 4039 pcbddc->local_primal_size_cc = 0; 4040 for (i=0;i<total_counts_cc;i++) { 4041 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4042 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4043 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4044 pcbddc->local_primal_size_cc += 1; 4045 } else if (PetscBTLookup(change_basis,i)) { 4046 for (k=0;k<constraints_n[i];k++) { 4047 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4048 } 4049 pcbddc->local_primal_size_cc += constraints_n[i]; 4050 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4051 PetscBTSet(qr_needed_idx,i); 4052 qr_needed = PETSC_TRUE; 4053 } 4054 } else { 4055 pcbddc->local_primal_size_cc += 1; 4056 } 4057 } 4058 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4059 pcbddc->n_vertices = total_primal_vertices; 4060 /* permute indices in order to have a sorted set of vertices */ 4061 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4062 4063 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4064 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4065 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4066 4067 /* nonzero structure of constraint matrix */ 4068 /* and get reference dof for local constraints */ 4069 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4070 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4071 4072 j = total_primal_vertices; 4073 total_counts = total_primal_vertices; 4074 cum = total_primal_vertices; 4075 for (i=n_vertices;i<total_counts_cc;i++) { 4076 if (!PetscBTLookup(change_basis,i)) { 4077 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4078 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4079 cum++; 4080 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4081 for (k=0;k<constraints_n[i];k++) { 4082 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4083 nnz[j+k] = size_of_constraint; 4084 } 4085 j += constraints_n[i]; 4086 } 4087 } 4088 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4089 ierr = PetscFree(nnz);CHKERRQ(ierr); 4090 4091 /* set values in constraint matrix */ 4092 for (i=0;i<total_primal_vertices;i++) { 4093 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4094 } 4095 total_counts = total_primal_vertices; 4096 for (i=n_vertices;i<total_counts_cc;i++) { 4097 if (!PetscBTLookup(change_basis,i)) { 4098 PetscInt *cols; 4099 4100 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4101 cols = constraints_idxs+constraints_idxs_ptr[i]; 4102 for (k=0;k<constraints_n[i];k++) { 4103 PetscInt row = total_counts+k; 4104 PetscScalar *vals; 4105 4106 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4107 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4108 } 4109 total_counts += constraints_n[i]; 4110 } 4111 } 4112 /* assembling */ 4113 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4114 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4115 4116 /* 4117 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4118 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4119 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4120 */ 4121 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4122 if (pcbddc->use_change_of_basis) { 4123 /* dual and primal dofs on a single cc */ 4124 PetscInt dual_dofs,primal_dofs; 4125 /* working stuff for GEQRF */ 4126 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4127 PetscBLASInt lqr_work; 4128 /* working stuff for UNGQR */ 4129 PetscScalar *gqr_work,lgqr_work_t; 4130 PetscBLASInt lgqr_work; 4131 /* working stuff for TRTRS */ 4132 PetscScalar *trs_rhs; 4133 PetscBLASInt Blas_NRHS; 4134 /* pointers for values insertion into change of basis matrix */ 4135 PetscInt *start_rows,*start_cols; 4136 PetscScalar *start_vals; 4137 /* working stuff for values insertion */ 4138 PetscBT is_primal; 4139 PetscInt *aux_primal_numbering_B; 4140 /* matrix sizes */ 4141 PetscInt global_size,local_size; 4142 /* temporary change of basis */ 4143 Mat localChangeOfBasisMatrix; 4144 /* extra space for debugging */ 4145 PetscScalar *dbg_work; 4146 4147 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4148 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4149 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4150 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4151 /* nonzeros for local mat */ 4152 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4153 if (!pcbddc->benign_change || pcbddc->fake_change) { 4154 for (i=0;i<pcis->n;i++) nnz[i]=1; 4155 } else { 4156 const PetscInt *ii; 4157 PetscInt n; 4158 PetscBool flg_row; 4159 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4160 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4161 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4162 } 4163 for (i=n_vertices;i<total_counts_cc;i++) { 4164 if (PetscBTLookup(change_basis,i)) { 4165 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4166 if (PetscBTLookup(qr_needed_idx,i)) { 4167 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4168 } else { 4169 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4170 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4171 } 4172 } 4173 } 4174 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4175 ierr = PetscFree(nnz);CHKERRQ(ierr); 4176 /* Set interior change in the matrix */ 4177 if (!pcbddc->benign_change || pcbddc->fake_change) { 4178 for (i=0;i<pcis->n;i++) { 4179 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4180 } 4181 } else { 4182 const PetscInt *ii,*jj; 4183 PetscScalar *aa; 4184 PetscInt n; 4185 PetscBool flg_row; 4186 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4187 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4188 for (i=0;i<n;i++) { 4189 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4190 } 4191 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4192 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4193 } 4194 4195 if (pcbddc->dbg_flag) { 4196 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4197 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4198 } 4199 4200 4201 /* Now we loop on the constraints which need a change of basis */ 4202 /* 4203 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4204 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4205 4206 Basic blocks of change of basis matrix T computed by 4207 4208 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4209 4210 | 1 0 ... 0 s_1/S | 4211 | 0 1 ... 0 s_2/S | 4212 | ... | 4213 | 0 ... 1 s_{n-1}/S | 4214 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4215 4216 with S = \sum_{i=1}^n s_i^2 4217 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4218 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4219 4220 - QR decomposition of constraints otherwise 4221 */ 4222 if (qr_needed) { 4223 /* space to store Q */ 4224 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4225 /* first we issue queries for optimal work */ 4226 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4227 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4228 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4229 lqr_work = -1; 4230 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4231 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4232 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4233 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4234 lgqr_work = -1; 4235 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4236 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4237 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4238 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4239 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4240 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4241 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4242 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4243 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4244 /* array to store scaling factors for reflectors */ 4245 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4246 /* array to store rhs and solution of triangular solver */ 4247 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4248 /* allocating workspace for check */ 4249 if (pcbddc->dbg_flag) { 4250 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4251 } 4252 } 4253 /* array to store whether a node is primal or not */ 4254 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4255 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4256 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4257 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4258 for (i=0;i<total_primal_vertices;i++) { 4259 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4260 } 4261 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4262 4263 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4264 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4265 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4266 if (PetscBTLookup(change_basis,total_counts)) { 4267 /* get constraint info */ 4268 primal_dofs = constraints_n[total_counts]; 4269 dual_dofs = size_of_constraint-primal_dofs; 4270 4271 if (pcbddc->dbg_flag) { 4272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4273 } 4274 4275 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4276 4277 /* copy quadrature constraints for change of basis check */ 4278 if (pcbddc->dbg_flag) { 4279 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4280 } 4281 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4282 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4283 4284 /* compute QR decomposition of constraints */ 4285 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4286 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4287 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4288 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4289 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4290 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4291 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4292 4293 /* explictly compute R^-T */ 4294 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4295 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4296 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4297 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4298 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4299 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4300 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4301 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4302 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4303 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4304 4305 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4306 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4307 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4308 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4309 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4310 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4311 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4312 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4313 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4314 4315 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4316 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4317 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4318 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4319 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4320 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4321 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4322 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4323 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4324 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4325 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4326 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4327 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4328 4329 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4330 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4331 /* insert cols for primal dofs */ 4332 for (j=0;j<primal_dofs;j++) { 4333 start_vals = &qr_basis[j*size_of_constraint]; 4334 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4335 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4336 } 4337 /* insert cols for dual dofs */ 4338 for (j=0,k=0;j<dual_dofs;k++) { 4339 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4340 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4341 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4342 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4343 j++; 4344 } 4345 } 4346 4347 /* check change of basis */ 4348 if (pcbddc->dbg_flag) { 4349 PetscInt ii,jj; 4350 PetscBool valid_qr=PETSC_TRUE; 4351 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4352 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4353 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4354 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4355 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4356 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4357 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4358 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4359 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4360 for (jj=0;jj<size_of_constraint;jj++) { 4361 for (ii=0;ii<primal_dofs;ii++) { 4362 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4363 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4364 } 4365 } 4366 if (!valid_qr) { 4367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4368 for (jj=0;jj<size_of_constraint;jj++) { 4369 for (ii=0;ii<primal_dofs;ii++) { 4370 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4371 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4372 } 4373 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4374 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4375 } 4376 } 4377 } 4378 } else { 4379 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4380 } 4381 } 4382 } else { /* simple transformation block */ 4383 PetscInt row,col; 4384 PetscScalar val,norm; 4385 4386 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4387 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4388 for (j=0;j<size_of_constraint;j++) { 4389 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4390 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4391 if (!PetscBTLookup(is_primal,row_B)) { 4392 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4393 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4394 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4395 } else { 4396 for (k=0;k<size_of_constraint;k++) { 4397 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4398 if (row != col) { 4399 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4400 } else { 4401 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4402 } 4403 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4404 } 4405 } 4406 } 4407 if (pcbddc->dbg_flag) { 4408 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4409 } 4410 } 4411 } else { 4412 if (pcbddc->dbg_flag) { 4413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4414 } 4415 } 4416 } 4417 4418 /* free workspace */ 4419 if (qr_needed) { 4420 if (pcbddc->dbg_flag) { 4421 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4422 } 4423 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4424 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4425 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4426 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4427 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4428 } 4429 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4430 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4431 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4432 4433 /* assembling of global change of variable */ 4434 if (!pcbddc->fake_change) { 4435 Mat tmat; 4436 PetscInt bs; 4437 4438 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4439 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4440 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4441 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4442 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4443 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4444 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4445 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4446 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4447 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4448 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4449 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4450 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4451 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4452 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4453 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4454 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4455 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4456 4457 /* check */ 4458 if (pcbddc->dbg_flag) { 4459 PetscReal error; 4460 Vec x,x_change; 4461 4462 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4463 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4464 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4465 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4466 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4467 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4468 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4469 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4470 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4471 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4472 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4473 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4474 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4476 ierr = VecDestroy(&x);CHKERRQ(ierr); 4477 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4478 } 4479 /* adapt sub_schurs computed (if any) */ 4480 if (pcbddc->use_deluxe_scaling) { 4481 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4482 4483 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4484 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4485 } 4486 if (sub_schurs->S_Ej_all) { 4487 Mat S_new,tmat; 4488 IS is_all_N,is_V_Sall = NULL; 4489 4490 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4491 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4492 if (pcbddc->deluxe_zerorows) { 4493 ISLocalToGlobalMapping NtoSall; 4494 IS is_V; 4495 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4496 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4497 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4498 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4499 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4500 } 4501 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4502 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4503 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4504 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4505 if (pcbddc->deluxe_zerorows) { 4506 const PetscScalar *array; 4507 const PetscInt *idxs_V,*idxs_all; 4508 PetscInt i,n_V; 4509 4510 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4511 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4512 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4513 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4514 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4515 for (i=0;i<n_V;i++) { 4516 PetscScalar val; 4517 PetscInt idx; 4518 4519 idx = idxs_V[i]; 4520 val = array[idxs_all[idxs_V[i]]]; 4521 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4522 } 4523 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4524 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4525 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4526 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4527 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4528 } 4529 sub_schurs->S_Ej_all = S_new; 4530 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4531 if (sub_schurs->sum_S_Ej_all) { 4532 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4533 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4534 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4535 if (pcbddc->deluxe_zerorows) { 4536 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4537 } 4538 sub_schurs->sum_S_Ej_all = S_new; 4539 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4540 } 4541 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4542 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4543 } 4544 /* destroy any change of basis context in sub_schurs */ 4545 if (sub_schurs->change) { 4546 PetscInt i; 4547 4548 for (i=0;i<sub_schurs->n_subs;i++) { 4549 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4550 } 4551 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4552 } 4553 } 4554 if (pcbddc->switch_static) { /* need to save the local change */ 4555 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4556 } else { 4557 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4558 } 4559 /* determine if any process has changed the pressures locally */ 4560 pcbddc->change_interior = pcbddc->benign_have_null; 4561 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4562 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4563 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4564 pcbddc->use_qr_single = qr_needed; 4565 } 4566 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4567 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4568 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4569 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4570 } else { 4571 Mat benign_global = NULL; 4572 if (pcbddc->benign_have_null) { 4573 Mat tmat; 4574 4575 pcbddc->change_interior = PETSC_TRUE; 4576 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4577 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4578 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4579 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4580 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4581 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4582 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4583 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4584 if (pcbddc->benign_change) { 4585 Mat M; 4586 4587 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4588 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4589 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4590 ierr = MatDestroy(&M);CHKERRQ(ierr); 4591 } else { 4592 Mat eye; 4593 PetscScalar *array; 4594 4595 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4596 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4597 for (i=0;i<pcis->n;i++) { 4598 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4599 } 4600 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4601 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4602 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4603 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4604 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4605 } 4606 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4607 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4608 } 4609 if (pcbddc->user_ChangeOfBasisMatrix) { 4610 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4611 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4612 } else if (pcbddc->benign_have_null) { 4613 pcbddc->ChangeOfBasisMatrix = benign_global; 4614 } 4615 } 4616 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4617 IS is_global; 4618 const PetscInt *gidxs; 4619 4620 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4621 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4622 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4623 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4624 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4625 } 4626 } 4627 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4628 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4629 } 4630 4631 if (!pcbddc->fake_change) { 4632 /* add pressure dofs to set of primal nodes for numbering purposes */ 4633 for (i=0;i<pcbddc->benign_n;i++) { 4634 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4635 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4636 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4637 pcbddc->local_primal_size_cc++; 4638 pcbddc->local_primal_size++; 4639 } 4640 4641 /* check if a new primal space has been introduced (also take into account benign trick) */ 4642 pcbddc->new_primal_space_local = PETSC_TRUE; 4643 if (olocal_primal_size == pcbddc->local_primal_size) { 4644 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4645 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4646 if (!pcbddc->new_primal_space_local) { 4647 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4648 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4649 } 4650 } 4651 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4652 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4653 } 4654 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4655 4656 /* flush dbg viewer */ 4657 if (pcbddc->dbg_flag) { 4658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4659 } 4660 4661 /* free workspace */ 4662 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4663 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4664 if (!pcbddc->adaptive_selection) { 4665 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4666 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4667 } else { 4668 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4669 pcbddc->adaptive_constraints_idxs_ptr, 4670 pcbddc->adaptive_constraints_data_ptr, 4671 pcbddc->adaptive_constraints_idxs, 4672 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4673 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4674 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4675 } 4676 PetscFunctionReturn(0); 4677 } 4678 4679 #undef __FUNCT__ 4680 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4681 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4682 { 4683 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4684 PC_IS *pcis = (PC_IS*)pc->data; 4685 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4686 PetscInt ierr,i,N; 4687 4688 PetscFunctionBegin; 4689 /* Reset previously computed graph */ 4690 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4691 /* Init local Graph struct */ 4692 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4693 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4694 4695 /* Check validity of the csr graph passed in by the user */ 4696 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4697 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4698 } 4699 4700 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4701 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4702 PetscInt *xadj,*adjncy; 4703 PetscInt nvtxs; 4704 PetscBool flg_row=PETSC_FALSE; 4705 4706 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4707 if (flg_row) { 4708 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4709 pcbddc->computed_rowadj = PETSC_TRUE; 4710 } 4711 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4712 } 4713 if (pcbddc->dbg_flag) { 4714 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4715 } 4716 4717 /* Setup of Graph */ 4718 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4719 4720 /* attach info on disconnected subdomains if present */ 4721 if (pcbddc->n_local_subs) { 4722 PetscInt *local_subs; 4723 4724 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4725 for (i=0;i<pcbddc->n_local_subs;i++) { 4726 const PetscInt *idxs; 4727 PetscInt nl,j; 4728 4729 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4730 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4731 for (j=0;j<nl;j++) { 4732 local_subs[idxs[j]] = i; 4733 } 4734 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4735 } 4736 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4737 pcbddc->mat_graph->local_subs = local_subs; 4738 } 4739 4740 /* Graph's connected components analysis */ 4741 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4742 PetscFunctionReturn(0); 4743 } 4744 4745 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4746 #undef __FUNCT__ 4747 #define __FUNCT__ "PCBDDCSubsetNumbering" 4748 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4749 { 4750 PetscSF sf; 4751 PetscLayout map; 4752 const PetscInt *idxs; 4753 PetscInt *leaf_data,*root_data,*gidxs; 4754 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4755 PetscInt n_n,nlocals,start,first_index; 4756 PetscMPIInt commsize; 4757 PetscBool first_found; 4758 PetscErrorCode ierr; 4759 4760 PetscFunctionBegin; 4761 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4762 if (subset_mult) { 4763 PetscCheckSameComm(subset,1,subset_mult,2); 4764 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4765 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4766 } 4767 /* create workspace layout for computing global indices of subset */ 4768 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4769 lbounds[0] = lbounds[1] = 0; 4770 for (i=0;i<n;i++) { 4771 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4772 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4773 } 4774 lbounds[0] = -lbounds[0]; 4775 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4776 gbounds[0] = -gbounds[0]; 4777 N = gbounds[1] - gbounds[0] + 1; 4778 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4779 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4780 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4781 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4782 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4783 4784 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4785 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4786 if (subset_mult) { 4787 const PetscInt* idxs_mult; 4788 4789 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4790 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4791 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4792 } else { 4793 for (i=0;i<n;i++) leaf_data[i] = 1; 4794 } 4795 /* local size of new subset */ 4796 n_n = 0; 4797 for (i=0;i<n;i++) n_n += leaf_data[i]; 4798 4799 /* global indexes in layout */ 4800 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4801 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4802 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4803 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4804 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4805 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4806 4807 /* reduce from leaves to roots */ 4808 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4809 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4810 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4811 4812 /* count indexes in local part of layout */ 4813 nlocals = 0; 4814 first_index = -1; 4815 first_found = PETSC_FALSE; 4816 for (i=0;i<Nl;i++) { 4817 if (!first_found && root_data[i]) { 4818 first_found = PETSC_TRUE; 4819 first_index = i; 4820 } 4821 nlocals += root_data[i]; 4822 } 4823 4824 /* cumulative of number of indexes and size of subset without holes */ 4825 #if defined(PETSC_HAVE_MPI_EXSCAN) 4826 start = 0; 4827 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4828 #else 4829 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4830 start = start-nlocals; 4831 #endif 4832 4833 if (N_n) { /* compute total size of new subset if requested */ 4834 *N_n = start + nlocals; 4835 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4836 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4837 } 4838 4839 /* adapt root data with cumulative */ 4840 if (first_found) { 4841 PetscInt old_index; 4842 4843 root_data[first_index] += start; 4844 old_index = first_index; 4845 for (i=first_index+1;i<Nl;i++) { 4846 if (root_data[i]) { 4847 root_data[i] += root_data[old_index]; 4848 old_index = i; 4849 } 4850 } 4851 } 4852 4853 /* from roots to leaves */ 4854 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4855 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4857 4858 /* create new IS with global indexes without holes */ 4859 if (subset_mult) { 4860 const PetscInt* idxs_mult; 4861 PetscInt cum; 4862 4863 cum = 0; 4864 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4865 for (i=0;i<n;i++) { 4866 PetscInt j; 4867 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4868 } 4869 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4870 } else { 4871 for (i=0;i<n;i++) { 4872 gidxs[i] = leaf_data[i]-1; 4873 } 4874 } 4875 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4876 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4877 PetscFunctionReturn(0); 4878 } 4879 4880 #undef __FUNCT__ 4881 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4882 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4883 { 4884 PetscInt i,j; 4885 PetscScalar *alphas; 4886 PetscErrorCode ierr; 4887 4888 PetscFunctionBegin; 4889 /* this implements stabilized Gram-Schmidt */ 4890 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4891 for (i=0;i<n;i++) { 4892 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4893 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4894 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4895 } 4896 ierr = PetscFree(alphas);CHKERRQ(ierr); 4897 PetscFunctionReturn(0); 4898 } 4899 4900 #undef __FUNCT__ 4901 #define __FUNCT__ "MatISGetSubassemblingPattern" 4902 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4903 { 4904 Mat A; 4905 PetscInt n_neighs,*neighs,*n_shared,**shared; 4906 PetscMPIInt size,rank,color; 4907 PetscInt *xadj,*adjncy; 4908 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4909 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4910 PetscInt void_procs,*procs_candidates = NULL; 4911 PetscInt xadj_count, *count; 4912 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4913 PetscSubcomm psubcomm; 4914 MPI_Comm subcomm; 4915 PetscErrorCode ierr; 4916 4917 PetscFunctionBegin; 4918 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4919 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4920 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4921 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4922 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4923 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4924 4925 if (have_void) *have_void = PETSC_FALSE; 4926 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4927 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4928 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4929 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4930 im_active = !!(n); 4931 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4932 void_procs = size - active_procs; 4933 /* get ranks of of non-active processes in mat communicator */ 4934 if (void_procs) { 4935 PetscInt ncand; 4936 4937 if (have_void) *have_void = PETSC_TRUE; 4938 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4939 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4940 for (i=0,ncand=0;i<size;i++) { 4941 if (!procs_candidates[i]) { 4942 procs_candidates[ncand++] = i; 4943 } 4944 } 4945 /* force n_subdomains to be not greater that the number of non-active processes */ 4946 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4947 } 4948 4949 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4950 if (active_procs < *n_subdomains) { 4951 PetscInt issize,isidx; 4952 if (im_active) { 4953 issize = 1; 4954 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4955 isidx = procs_candidates[rank]; 4956 } else { 4957 isidx = rank; 4958 } 4959 } else { 4960 issize = 0; 4961 isidx = -1; 4962 } 4963 *n_subdomains = active_procs; 4964 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4965 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4966 PetscFunctionReturn(0); 4967 } 4968 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4969 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4970 threshold = PetscMax(threshold,2); 4971 4972 /* Get info on mapping */ 4973 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4974 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4975 4976 /* build local CSR graph of subdomains' connectivity */ 4977 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4978 xadj[0] = 0; 4979 xadj[1] = PetscMax(n_neighs-1,0); 4980 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4981 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4982 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4983 for (i=1;i<n_neighs;i++) 4984 for (j=0;j<n_shared[i];j++) 4985 count[shared[i][j]] += 1; 4986 4987 xadj_count = 0; 4988 for (i=1;i<n_neighs;i++) { 4989 for (j=0;j<n_shared[i];j++) { 4990 if (count[shared[i][j]] < threshold) { 4991 adjncy[xadj_count] = neighs[i]; 4992 adjncy_wgt[xadj_count] = n_shared[i]; 4993 xadj_count++; 4994 break; 4995 } 4996 } 4997 } 4998 xadj[1] = xadj_count; 4999 ierr = PetscFree(count);CHKERRQ(ierr); 5000 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5001 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5002 5003 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5004 5005 /* Restrict work on active processes only */ 5006 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5007 if (void_procs) { 5008 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5009 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5010 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5011 subcomm = PetscSubcommChild(psubcomm); 5012 } else { 5013 psubcomm = NULL; 5014 subcomm = PetscObjectComm((PetscObject)mat); 5015 } 5016 5017 v_wgt = NULL; 5018 if (!color) { 5019 ierr = PetscFree(xadj);CHKERRQ(ierr); 5020 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5021 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5022 } else { 5023 Mat subdomain_adj; 5024 IS new_ranks,new_ranks_contig; 5025 MatPartitioning partitioner; 5026 PetscInt rstart=0,rend=0; 5027 PetscInt *is_indices,*oldranks; 5028 PetscMPIInt size; 5029 PetscBool aggregate; 5030 5031 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5032 if (void_procs) { 5033 PetscInt prank = rank; 5034 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5035 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5036 for (i=0;i<xadj[1];i++) { 5037 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5038 } 5039 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5040 } else { 5041 oldranks = NULL; 5042 } 5043 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5044 if (aggregate) { /* TODO: all this part could be made more efficient */ 5045 PetscInt lrows,row,ncols,*cols; 5046 PetscMPIInt nrank; 5047 PetscScalar *vals; 5048 5049 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5050 lrows = 0; 5051 if (nrank<redprocs) { 5052 lrows = size/redprocs; 5053 if (nrank<size%redprocs) lrows++; 5054 } 5055 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5056 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5057 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5058 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5059 row = nrank; 5060 ncols = xadj[1]-xadj[0]; 5061 cols = adjncy; 5062 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5063 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5064 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5065 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5066 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5067 ierr = PetscFree(xadj);CHKERRQ(ierr); 5068 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5069 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5070 ierr = PetscFree(vals);CHKERRQ(ierr); 5071 if (use_vwgt) { 5072 Vec v; 5073 const PetscScalar *array; 5074 PetscInt nl; 5075 5076 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5077 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5078 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5079 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5080 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5081 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5082 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5083 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5084 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5085 ierr = VecDestroy(&v);CHKERRQ(ierr); 5086 } 5087 } else { 5088 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5089 if (use_vwgt) { 5090 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5091 v_wgt[0] = local_size; 5092 } 5093 } 5094 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5095 5096 /* Partition */ 5097 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5098 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5099 if (v_wgt) { 5100 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5101 } 5102 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5103 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5104 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5105 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5106 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5107 5108 /* renumber new_ranks to avoid "holes" in new set of processors */ 5109 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5110 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5111 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5112 if (!aggregate) { 5113 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5114 #if defined(PETSC_USE_DEBUG) 5115 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5116 #endif 5117 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5118 } else if (oldranks) { 5119 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5120 } else { 5121 ranks_send_to_idx[0] = is_indices[0]; 5122 } 5123 } else { 5124 PetscInt idxs[1]; 5125 PetscMPIInt tag; 5126 MPI_Request *reqs; 5127 5128 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5129 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5130 for (i=rstart;i<rend;i++) { 5131 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5132 } 5133 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5134 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5135 ierr = PetscFree(reqs);CHKERRQ(ierr); 5136 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5137 #if defined(PETSC_USE_DEBUG) 5138 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5139 #endif 5140 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5141 } else if (oldranks) { 5142 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5143 } else { 5144 ranks_send_to_idx[0] = idxs[0]; 5145 } 5146 } 5147 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5148 /* clean up */ 5149 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5150 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5151 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5152 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5153 } 5154 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5155 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5156 5157 /* assemble parallel IS for sends */ 5158 i = 1; 5159 if (!color) i=0; 5160 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5161 PetscFunctionReturn(0); 5162 } 5163 5164 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5165 5166 #undef __FUNCT__ 5167 #define __FUNCT__ "MatISSubassemble" 5168 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5169 { 5170 Mat local_mat; 5171 IS is_sends_internal; 5172 PetscInt rows,cols,new_local_rows; 5173 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5174 PetscBool ismatis,isdense,newisdense,destroy_mat; 5175 ISLocalToGlobalMapping l2gmap; 5176 PetscInt* l2gmap_indices; 5177 const PetscInt* is_indices; 5178 MatType new_local_type; 5179 /* buffers */ 5180 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5181 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5182 PetscInt *recv_buffer_idxs_local; 5183 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5184 /* MPI */ 5185 MPI_Comm comm,comm_n; 5186 PetscSubcomm subcomm; 5187 PetscMPIInt n_sends,n_recvs,commsize; 5188 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5189 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5190 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5191 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5192 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5193 PetscErrorCode ierr; 5194 5195 PetscFunctionBegin; 5196 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5197 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5198 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5199 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5200 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5201 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5202 PetscValidLogicalCollectiveBool(mat,reuse,6); 5203 PetscValidLogicalCollectiveInt(mat,nis,8); 5204 5205 /* further checks */ 5206 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5207 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5208 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5209 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5210 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5211 if (reuse && *mat_n) { 5212 PetscInt mrows,mcols,mnrows,mncols; 5213 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5214 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5215 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5216 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5217 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5218 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5219 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5220 } 5221 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5222 PetscValidLogicalCollectiveInt(mat,bs,0); 5223 5224 /* prepare IS for sending if not provided */ 5225 if (!is_sends) { 5226 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5227 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5228 } else { 5229 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5230 is_sends_internal = is_sends; 5231 } 5232 5233 /* get comm */ 5234 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5235 5236 /* compute number of sends */ 5237 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5238 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5239 5240 /* compute number of receives */ 5241 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5242 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5243 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5244 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5245 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5246 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5247 ierr = PetscFree(iflags);CHKERRQ(ierr); 5248 5249 /* restrict comm if requested */ 5250 subcomm = 0; 5251 destroy_mat = PETSC_FALSE; 5252 if (restrict_comm) { 5253 PetscMPIInt color,subcommsize; 5254 5255 color = 0; 5256 if (restrict_full) { 5257 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5258 } else { 5259 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5260 } 5261 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5262 subcommsize = commsize - subcommsize; 5263 /* check if reuse has been requested */ 5264 if (reuse) { 5265 if (*mat_n) { 5266 PetscMPIInt subcommsize2; 5267 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5268 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5269 comm_n = PetscObjectComm((PetscObject)*mat_n); 5270 } else { 5271 comm_n = PETSC_COMM_SELF; 5272 } 5273 } else { /* MAT_INITIAL_MATRIX */ 5274 PetscMPIInt rank; 5275 5276 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5277 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5278 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5279 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5280 comm_n = PetscSubcommChild(subcomm); 5281 } 5282 /* flag to destroy *mat_n if not significative */ 5283 if (color) destroy_mat = PETSC_TRUE; 5284 } else { 5285 comm_n = comm; 5286 } 5287 5288 /* prepare send/receive buffers */ 5289 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5290 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5291 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5292 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5293 if (nis) { 5294 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5295 } 5296 5297 /* Get data from local matrices */ 5298 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5299 /* TODO: See below some guidelines on how to prepare the local buffers */ 5300 /* 5301 send_buffer_vals should contain the raw values of the local matrix 5302 send_buffer_idxs should contain: 5303 - MatType_PRIVATE type 5304 - PetscInt size_of_l2gmap 5305 - PetscInt global_row_indices[size_of_l2gmap] 5306 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5307 */ 5308 else { 5309 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5310 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5311 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5312 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5313 send_buffer_idxs[1] = i; 5314 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5315 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5316 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5317 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5318 for (i=0;i<n_sends;i++) { 5319 ilengths_vals[is_indices[i]] = len*len; 5320 ilengths_idxs[is_indices[i]] = len+2; 5321 } 5322 } 5323 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5324 /* additional is (if any) */ 5325 if (nis) { 5326 PetscMPIInt psum; 5327 PetscInt j; 5328 for (j=0,psum=0;j<nis;j++) { 5329 PetscInt plen; 5330 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5331 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5332 psum += len+1; /* indices + lenght */ 5333 } 5334 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5335 for (j=0,psum=0;j<nis;j++) { 5336 PetscInt plen; 5337 const PetscInt *is_array_idxs; 5338 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5339 send_buffer_idxs_is[psum] = plen; 5340 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5341 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5342 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5343 psum += plen+1; /* indices + lenght */ 5344 } 5345 for (i=0;i<n_sends;i++) { 5346 ilengths_idxs_is[is_indices[i]] = psum; 5347 } 5348 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5349 } 5350 5351 buf_size_idxs = 0; 5352 buf_size_vals = 0; 5353 buf_size_idxs_is = 0; 5354 for (i=0;i<n_recvs;i++) { 5355 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5356 buf_size_vals += (PetscInt)olengths_vals[i]; 5357 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5358 } 5359 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5360 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5361 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5362 5363 /* get new tags for clean communications */ 5364 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5365 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5366 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5367 5368 /* allocate for requests */ 5369 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5370 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5371 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5372 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5373 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5374 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5375 5376 /* communications */ 5377 ptr_idxs = recv_buffer_idxs; 5378 ptr_vals = recv_buffer_vals; 5379 ptr_idxs_is = recv_buffer_idxs_is; 5380 for (i=0;i<n_recvs;i++) { 5381 source_dest = onodes[i]; 5382 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5383 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5384 ptr_idxs += olengths_idxs[i]; 5385 ptr_vals += olengths_vals[i]; 5386 if (nis) { 5387 source_dest = onodes_is[i]; 5388 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5389 ptr_idxs_is += olengths_idxs_is[i]; 5390 } 5391 } 5392 for (i=0;i<n_sends;i++) { 5393 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5394 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5395 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5396 if (nis) { 5397 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5398 } 5399 } 5400 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5401 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5402 5403 /* assemble new l2g map */ 5404 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5405 ptr_idxs = recv_buffer_idxs; 5406 new_local_rows = 0; 5407 for (i=0;i<n_recvs;i++) { 5408 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5409 ptr_idxs += olengths_idxs[i]; 5410 } 5411 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5412 ptr_idxs = recv_buffer_idxs; 5413 new_local_rows = 0; 5414 for (i=0;i<n_recvs;i++) { 5415 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5416 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5417 ptr_idxs += olengths_idxs[i]; 5418 } 5419 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5420 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5421 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5422 5423 /* infer new local matrix type from received local matrices type */ 5424 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5425 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5426 if (n_recvs) { 5427 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5428 ptr_idxs = recv_buffer_idxs; 5429 for (i=0;i<n_recvs;i++) { 5430 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5431 new_local_type_private = MATAIJ_PRIVATE; 5432 break; 5433 } 5434 ptr_idxs += olengths_idxs[i]; 5435 } 5436 switch (new_local_type_private) { 5437 case MATDENSE_PRIVATE: 5438 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5439 new_local_type = MATSEQAIJ; 5440 bs = 1; 5441 } else { /* if I receive only 1 dense matrix */ 5442 new_local_type = MATSEQDENSE; 5443 bs = 1; 5444 } 5445 break; 5446 case MATAIJ_PRIVATE: 5447 new_local_type = MATSEQAIJ; 5448 bs = 1; 5449 break; 5450 case MATBAIJ_PRIVATE: 5451 new_local_type = MATSEQBAIJ; 5452 break; 5453 case MATSBAIJ_PRIVATE: 5454 new_local_type = MATSEQSBAIJ; 5455 break; 5456 default: 5457 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5458 break; 5459 } 5460 } else { /* by default, new_local_type is seqdense */ 5461 new_local_type = MATSEQDENSE; 5462 bs = 1; 5463 } 5464 5465 /* create MATIS object if needed */ 5466 if (!reuse) { 5467 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5468 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5469 } else { 5470 /* it also destroys the local matrices */ 5471 if (*mat_n) { 5472 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5473 } else { /* this is a fake object */ 5474 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5475 } 5476 } 5477 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5478 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5479 5480 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5481 5482 /* Global to local map of received indices */ 5483 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5484 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5485 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5486 5487 /* restore attributes -> type of incoming data and its size */ 5488 buf_size_idxs = 0; 5489 for (i=0;i<n_recvs;i++) { 5490 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5491 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5492 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5493 } 5494 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5495 5496 /* set preallocation */ 5497 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5498 if (!newisdense) { 5499 PetscInt *new_local_nnz=0; 5500 5501 ptr_idxs = recv_buffer_idxs_local; 5502 if (n_recvs) { 5503 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5504 } 5505 for (i=0;i<n_recvs;i++) { 5506 PetscInt j; 5507 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5508 for (j=0;j<*(ptr_idxs+1);j++) { 5509 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5510 } 5511 } else { 5512 /* TODO */ 5513 } 5514 ptr_idxs += olengths_idxs[i]; 5515 } 5516 if (new_local_nnz) { 5517 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5518 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5519 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5520 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5521 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5522 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5523 } else { 5524 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5525 } 5526 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5527 } else { 5528 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5529 } 5530 5531 /* set values */ 5532 ptr_vals = recv_buffer_vals; 5533 ptr_idxs = recv_buffer_idxs_local; 5534 for (i=0;i<n_recvs;i++) { 5535 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5536 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5537 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5538 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5539 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5540 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5541 } else { 5542 /* TODO */ 5543 } 5544 ptr_idxs += olengths_idxs[i]; 5545 ptr_vals += olengths_vals[i]; 5546 } 5547 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5548 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5549 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5550 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5551 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5552 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5553 5554 #if 0 5555 if (!restrict_comm) { /* check */ 5556 Vec lvec,rvec; 5557 PetscReal infty_error; 5558 5559 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5560 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5561 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5562 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5563 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5564 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5565 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5566 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5567 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5568 } 5569 #endif 5570 5571 /* assemble new additional is (if any) */ 5572 if (nis) { 5573 PetscInt **temp_idxs,*count_is,j,psum; 5574 5575 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5576 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5577 ptr_idxs = recv_buffer_idxs_is; 5578 psum = 0; 5579 for (i=0;i<n_recvs;i++) { 5580 for (j=0;j<nis;j++) { 5581 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5582 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5583 psum += plen; 5584 ptr_idxs += plen+1; /* shift pointer to received data */ 5585 } 5586 } 5587 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5588 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5589 for (i=1;i<nis;i++) { 5590 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5591 } 5592 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5593 ptr_idxs = recv_buffer_idxs_is; 5594 for (i=0;i<n_recvs;i++) { 5595 for (j=0;j<nis;j++) { 5596 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5597 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5598 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5599 ptr_idxs += plen+1; /* shift pointer to received data */ 5600 } 5601 } 5602 for (i=0;i<nis;i++) { 5603 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5604 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5605 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5606 } 5607 ierr = PetscFree(count_is);CHKERRQ(ierr); 5608 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5609 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5610 } 5611 /* free workspace */ 5612 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5613 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5614 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5615 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5616 if (isdense) { 5617 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5618 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5619 } else { 5620 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5621 } 5622 if (nis) { 5623 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5624 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5625 } 5626 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5627 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5628 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5629 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5630 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5631 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5632 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5633 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5634 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5635 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5636 ierr = PetscFree(onodes);CHKERRQ(ierr); 5637 if (nis) { 5638 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5639 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5640 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5641 } 5642 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5643 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5644 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5645 for (i=0;i<nis;i++) { 5646 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5647 } 5648 *mat_n = NULL; 5649 } 5650 PetscFunctionReturn(0); 5651 } 5652 5653 /* temporary hack into ksp private data structure */ 5654 #include <petsc/private/kspimpl.h> 5655 5656 #undef __FUNCT__ 5657 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5658 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5659 { 5660 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5661 PC_IS *pcis = (PC_IS*)pc->data; 5662 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5663 MatNullSpace CoarseNullSpace=NULL; 5664 ISLocalToGlobalMapping coarse_islg; 5665 IS coarse_is,*isarray; 5666 PetscInt i,im_active=-1,active_procs=-1; 5667 PetscInt nis,nisdofs,nisneu,nisvert; 5668 PC pc_temp; 5669 PCType coarse_pc_type; 5670 KSPType coarse_ksp_type; 5671 PetscBool multilevel_requested,multilevel_allowed; 5672 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5673 Mat t_coarse_mat_is; 5674 PetscInt ncoarse; 5675 PetscBool compute_vecs = PETSC_FALSE; 5676 PetscScalar *array; 5677 MatReuse coarse_mat_reuse; 5678 PetscBool restr, full_restr, have_void; 5679 PetscErrorCode ierr; 5680 5681 PetscFunctionBegin; 5682 /* Assign global numbering to coarse dofs */ 5683 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5684 PetscInt ocoarse_size; 5685 compute_vecs = PETSC_TRUE; 5686 ocoarse_size = pcbddc->coarse_size; 5687 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5688 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5689 /* see if we can avoid some work */ 5690 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5691 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5692 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5693 PC pc; 5694 PetscBool isbddc; 5695 5696 /* temporary workaround since PCBDDC does not have a reset method so far */ 5697 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5698 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5699 if (isbddc) { 5700 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5701 } else { 5702 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5703 } 5704 coarse_reuse = PETSC_FALSE; 5705 } else { /* we can safely reuse already computed coarse matrix */ 5706 coarse_reuse = PETSC_TRUE; 5707 } 5708 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5709 coarse_reuse = PETSC_FALSE; 5710 } 5711 /* reset any subassembling information */ 5712 if (!coarse_reuse || pcbddc->recompute_topography) { 5713 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5714 } 5715 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5716 coarse_reuse = PETSC_TRUE; 5717 } 5718 /* assemble coarse matrix */ 5719 if (coarse_reuse && pcbddc->coarse_ksp) { 5720 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5721 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5722 coarse_mat_reuse = MAT_REUSE_MATRIX; 5723 } else { 5724 coarse_mat = NULL; 5725 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5726 } 5727 5728 /* creates temporary l2gmap and IS for coarse indexes */ 5729 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5730 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5731 5732 /* creates temporary MATIS object for coarse matrix */ 5733 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5734 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5735 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5736 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5737 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5738 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5739 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5740 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5741 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5742 5743 /* count "active" (i.e. with positive local size) and "void" processes */ 5744 im_active = !!(pcis->n); 5745 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5746 5747 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5748 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5749 /* full_restr : just use the receivers from the subassembling pattern */ 5750 coarse_mat_is = NULL; 5751 multilevel_allowed = PETSC_FALSE; 5752 multilevel_requested = PETSC_FALSE; 5753 full_restr = PETSC_TRUE; 5754 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5755 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5756 if (multilevel_requested) { 5757 ncoarse = active_procs/pcbddc->coarsening_ratio; 5758 restr = PETSC_FALSE; 5759 full_restr = PETSC_FALSE; 5760 } else { 5761 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5762 restr = PETSC_TRUE; 5763 full_restr = PETSC_TRUE; 5764 } 5765 ncoarse = PetscMax(1,ncoarse); 5766 if (!pcbddc->coarse_subassembling) { 5767 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5768 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5769 PetscInt psum; 5770 PetscMPIInt size; 5771 if (pcbddc->coarse_ksp) psum = 1; 5772 else psum = 0; 5773 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5774 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5775 if (ncoarse < size) have_void = PETSC_TRUE; 5776 } 5777 /* determine if we can go multilevel */ 5778 if (multilevel_requested) { 5779 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5780 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5781 } 5782 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5783 5784 /* dump subassembling pattern */ 5785 if (pcbddc->dbg_flag && multilevel_allowed) { 5786 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5787 } 5788 5789 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5790 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5791 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5792 const PetscInt *idxs; 5793 ISLocalToGlobalMapping tmap; 5794 5795 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5796 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5797 /* allocate space for temporary storage */ 5798 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5799 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5800 /* allocate for IS array */ 5801 nisdofs = pcbddc->n_ISForDofsLocal; 5802 nisneu = !!pcbddc->NeumannBoundariesLocal; 5803 nisvert = 0; /* nisvert is not used */ 5804 nis = nisdofs + nisneu + nisvert; 5805 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5806 /* dofs splitting */ 5807 for (i=0;i<nisdofs;i++) { 5808 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5809 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5810 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5811 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5812 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5813 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5814 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5815 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5816 } 5817 /* neumann boundaries */ 5818 if (pcbddc->NeumannBoundariesLocal) { 5819 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5820 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5821 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5822 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5823 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5824 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5825 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5826 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5827 } 5828 /* free memory */ 5829 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5830 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5831 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5832 } else { 5833 nis = 0; 5834 nisdofs = 0; 5835 nisneu = 0; 5836 nisvert = 0; 5837 isarray = NULL; 5838 } 5839 /* destroy no longer needed map */ 5840 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5841 5842 /* subassemble */ 5843 if (multilevel_allowed) { 5844 PetscBool reuse,reuser; 5845 if (coarse_mat) reuse = PETSC_TRUE; 5846 else reuse = PETSC_FALSE; 5847 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5848 if (reuser) { 5849 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5850 } else { 5851 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5852 } 5853 /* TODO: if (pcbddc->benign_have_null) -> give a hint to the coarser levels if they have to locally apply the benign trick or not */ 5854 } else { 5855 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5856 } 5857 if (coarse_mat_is || coarse_mat) { 5858 PetscMPIInt size; 5859 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5860 if (!multilevel_allowed) { 5861 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5862 } else { 5863 Mat A; 5864 5865 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5866 if (coarse_mat_is) { 5867 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5868 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5869 coarse_mat = coarse_mat_is; 5870 } 5871 /* be sure we don't have MatSeqDENSE as local mat */ 5872 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5873 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5874 } 5875 } 5876 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5877 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5878 5879 /* create local to global scatters for coarse problem */ 5880 if (compute_vecs) { 5881 PetscInt lrows; 5882 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5883 if (coarse_mat) { 5884 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5885 } else { 5886 lrows = 0; 5887 } 5888 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5889 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5890 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5891 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5892 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5893 } 5894 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5895 5896 /* set defaults for coarse KSP and PC */ 5897 if (multilevel_allowed) { 5898 coarse_ksp_type = KSPRICHARDSON; 5899 coarse_pc_type = PCBDDC; 5900 } else { 5901 coarse_ksp_type = KSPPREONLY; 5902 coarse_pc_type = PCREDUNDANT; 5903 } 5904 5905 /* print some info if requested */ 5906 if (pcbddc->dbg_flag) { 5907 if (!multilevel_allowed) { 5908 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5909 if (multilevel_requested) { 5910 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5911 } else if (pcbddc->max_levels) { 5912 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5913 } 5914 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5915 } 5916 } 5917 5918 /* create the coarse KSP object only once with defaults */ 5919 if (coarse_mat) { 5920 PetscViewer dbg_viewer = NULL; 5921 if (pcbddc->dbg_flag) { 5922 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5923 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5924 } 5925 if (!pcbddc->coarse_ksp) { 5926 char prefix[256],str_level[16]; 5927 size_t len; 5928 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5929 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5930 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5931 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5932 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5933 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5934 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5935 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5936 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5937 /* prefix */ 5938 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5939 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5940 if (!pcbddc->current_level) { 5941 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5942 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5943 } else { 5944 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5945 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5946 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5947 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5948 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5949 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5950 } 5951 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5952 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5953 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5954 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5955 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5956 /* allow user customization */ 5957 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5958 } 5959 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5960 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5961 if (nisdofs) { 5962 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5963 for (i=0;i<nisdofs;i++) { 5964 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5965 } 5966 } 5967 if (nisneu) { 5968 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5969 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5970 } 5971 if (nisvert) { 5972 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5973 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5974 } 5975 5976 /* get some info after set from options */ 5977 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5978 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5979 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5980 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5981 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5982 isbddc = PETSC_FALSE; 5983 } 5984 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5985 if (isredundant) { 5986 KSP inner_ksp; 5987 PC inner_pc; 5988 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5989 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5990 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5991 } 5992 5993 /* parameters which miss an API */ 5994 if (isbddc) { 5995 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5996 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5997 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5998 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5999 if (pcbddc_coarse->benign_saddle_point) { 6000 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 6001 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6002 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6003 } 6004 } 6005 6006 /* propagate symmetry info of coarse matrix */ 6007 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6008 if (pc->pmat->symmetric_set) { 6009 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6010 } 6011 if (pc->pmat->hermitian_set) { 6012 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6013 } 6014 if (pc->pmat->spd_set) { 6015 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6016 } 6017 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6018 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6019 } 6020 /* set operators */ 6021 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6022 if (pcbddc->dbg_flag) { 6023 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6024 } 6025 } 6026 ierr = PetscFree(isarray);CHKERRQ(ierr); 6027 #if 0 6028 { 6029 PetscViewer viewer; 6030 char filename[256]; 6031 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6032 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6033 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6034 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6035 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6036 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6037 } 6038 #endif 6039 6040 /* Compute coarse null space (special handling by BDDC only) */ 6041 #if 0 6042 if (pcbddc->NullSpace) { 6043 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 6044 } 6045 #endif 6046 /* hack */ 6047 if (pcbddc->coarse_ksp) { 6048 Vec crhs,csol; 6049 6050 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6051 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6052 if (!csol) { 6053 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6054 } 6055 if (!crhs) { 6056 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6057 } 6058 } 6059 6060 /* compute null space for coarse solver if the benign trick has been requested */ 6061 if (pcbddc->benign_null) { 6062 6063 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6064 for (i=0;i<pcbddc->benign_n;i++) { 6065 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6066 } 6067 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6068 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6069 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6070 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6071 if (coarse_mat) { 6072 Vec nullv; 6073 PetscScalar *array,*array2; 6074 PetscInt nl; 6075 6076 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6077 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6078 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6079 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6080 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6081 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6082 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6083 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6084 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6085 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6086 } 6087 } 6088 6089 if (pcbddc->coarse_ksp) { 6090 PetscBool ispreonly; 6091 6092 if (CoarseNullSpace) { 6093 PetscBool isnull; 6094 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6095 if (0) { 6096 if (isbddc && !pcbddc->benign_saddle_point) { 6097 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 6098 } else { 6099 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6100 } 6101 } else { 6102 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6103 } 6104 } 6105 /* setup coarse ksp */ 6106 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6107 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6108 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6109 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6110 KSP check_ksp; 6111 KSPType check_ksp_type; 6112 PC check_pc; 6113 Vec check_vec,coarse_vec; 6114 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6115 PetscInt its; 6116 PetscBool compute_eigs; 6117 PetscReal *eigs_r,*eigs_c; 6118 PetscInt neigs; 6119 const char *prefix; 6120 6121 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6122 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6123 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6124 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6125 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6126 /* prevent from setup unneeded object */ 6127 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6128 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6129 if (ispreonly) { 6130 check_ksp_type = KSPPREONLY; 6131 compute_eigs = PETSC_FALSE; 6132 } else { 6133 check_ksp_type = KSPGMRES; 6134 compute_eigs = PETSC_TRUE; 6135 } 6136 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6137 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6138 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6139 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6140 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6141 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6142 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6143 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6144 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6145 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6146 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6147 /* create random vec */ 6148 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6149 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6150 if (CoarseNullSpace) { 6151 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 6152 } 6153 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6154 /* solve coarse problem */ 6155 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6156 if (CoarseNullSpace) { 6157 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 6158 } 6159 /* set eigenvalue estimation if preonly has not been requested */ 6160 if (compute_eigs) { 6161 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6162 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6163 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6164 lambda_max = eigs_r[neigs-1]; 6165 lambda_min = eigs_r[0]; 6166 if (pcbddc->use_coarse_estimates) { 6167 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6168 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6169 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6170 } 6171 } 6172 } 6173 6174 /* check coarse problem residual error */ 6175 if (pcbddc->dbg_flag) { 6176 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6177 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6178 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6179 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6180 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6181 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6182 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6183 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6184 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6185 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6186 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6187 if (CoarseNullSpace) { 6188 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6189 } 6190 if (compute_eigs) { 6191 PetscReal lambda_max_s,lambda_min_s; 6192 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6193 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6194 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6195 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6196 for (i=0;i<neigs;i++) { 6197 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6198 } 6199 } 6200 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6201 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6202 } 6203 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6204 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6205 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6206 if (compute_eigs) { 6207 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6208 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6209 } 6210 } 6211 } 6212 /* print additional info */ 6213 if (pcbddc->dbg_flag) { 6214 /* waits until all processes reaches this point */ 6215 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6216 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6217 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6218 } 6219 6220 /* free memory */ 6221 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6222 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6223 PetscFunctionReturn(0); 6224 } 6225 6226 #undef __FUNCT__ 6227 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6228 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6229 { 6230 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6231 PC_IS* pcis = (PC_IS*)pc->data; 6232 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6233 IS subset,subset_mult,subset_n; 6234 PetscInt local_size,coarse_size=0; 6235 PetscInt *local_primal_indices=NULL; 6236 const PetscInt *t_local_primal_indices; 6237 PetscErrorCode ierr; 6238 6239 PetscFunctionBegin; 6240 /* Compute global number of coarse dofs */ 6241 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6242 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6243 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6244 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6245 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6246 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6247 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6248 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6249 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6250 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6251 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6252 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6253 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6254 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6255 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6256 6257 /* check numbering */ 6258 if (pcbddc->dbg_flag) { 6259 PetscScalar coarsesum,*array,*array2; 6260 PetscInt i; 6261 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6262 6263 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6264 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6266 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6267 /* counter */ 6268 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6269 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6270 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6271 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6272 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6273 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6274 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6275 for (i=0;i<pcbddc->local_primal_size;i++) { 6276 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6277 } 6278 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6279 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6280 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6281 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6282 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6283 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6284 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6285 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6286 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6287 for (i=0;i<pcis->n;i++) { 6288 if (array[i] != 0.0 && array[i] != array2[i]) { 6289 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6290 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6291 set_error = PETSC_TRUE; 6292 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6294 } 6295 } 6296 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6297 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6298 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6299 for (i=0;i<pcis->n;i++) { 6300 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6301 } 6302 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6303 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6304 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6305 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6306 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6307 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6308 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6309 PetscInt *gidxs; 6310 6311 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6312 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6313 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6316 for (i=0;i<pcbddc->local_primal_size;i++) { 6317 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6318 } 6319 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6320 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6321 } 6322 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6323 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6324 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6325 } 6326 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6327 /* get back data */ 6328 *coarse_size_n = coarse_size; 6329 *local_primal_indices_n = local_primal_indices; 6330 PetscFunctionReturn(0); 6331 } 6332 6333 #undef __FUNCT__ 6334 #define __FUNCT__ "PCBDDCGlobalToLocal" 6335 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6336 { 6337 IS localis_t; 6338 PetscInt i,lsize,*idxs,n; 6339 PetscScalar *vals; 6340 PetscErrorCode ierr; 6341 6342 PetscFunctionBegin; 6343 /* get indices in local ordering exploiting local to global map */ 6344 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6345 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6346 for (i=0;i<lsize;i++) vals[i] = 1.0; 6347 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6348 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6349 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6350 if (idxs) { /* multilevel guard */ 6351 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6352 } 6353 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6354 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6355 ierr = PetscFree(vals);CHKERRQ(ierr); 6356 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6357 /* now compute set in local ordering */ 6358 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6359 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6360 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6361 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6362 for (i=0,lsize=0;i<n;i++) { 6363 if (PetscRealPart(vals[i]) > 0.5) { 6364 lsize++; 6365 } 6366 } 6367 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6368 for (i=0,lsize=0;i<n;i++) { 6369 if (PetscRealPart(vals[i]) > 0.5) { 6370 idxs[lsize++] = i; 6371 } 6372 } 6373 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6374 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6375 *localis = localis_t; 6376 PetscFunctionReturn(0); 6377 } 6378 6379 #undef __FUNCT__ 6380 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6381 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6382 { 6383 PC_IS *pcis=(PC_IS*)pc->data; 6384 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6385 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6386 Mat S_j; 6387 PetscInt *used_xadj,*used_adjncy; 6388 PetscBool free_used_adj; 6389 PetscErrorCode ierr; 6390 6391 PetscFunctionBegin; 6392 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6393 free_used_adj = PETSC_FALSE; 6394 if (pcbddc->sub_schurs_layers == -1) { 6395 used_xadj = NULL; 6396 used_adjncy = NULL; 6397 } else { 6398 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6399 used_xadj = pcbddc->mat_graph->xadj; 6400 used_adjncy = pcbddc->mat_graph->adjncy; 6401 } else if (pcbddc->computed_rowadj) { 6402 used_xadj = pcbddc->mat_graph->xadj; 6403 used_adjncy = pcbddc->mat_graph->adjncy; 6404 } else { 6405 PetscBool flg_row=PETSC_FALSE; 6406 const PetscInt *xadj,*adjncy; 6407 PetscInt nvtxs; 6408 6409 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6410 if (flg_row) { 6411 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6412 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6413 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6414 free_used_adj = PETSC_TRUE; 6415 } else { 6416 pcbddc->sub_schurs_layers = -1; 6417 used_xadj = NULL; 6418 used_adjncy = NULL; 6419 } 6420 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6421 } 6422 } 6423 6424 /* setup sub_schurs data */ 6425 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6426 if (!sub_schurs->schur_explicit) { 6427 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6428 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6429 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6430 } else { 6431 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6432 PetscBool isseqaij,need_change = PETSC_FALSE;; 6433 PetscInt benign_n; 6434 Mat change = NULL; 6435 Vec scaling = NULL; 6436 IS change_primal = NULL; 6437 6438 if (!pcbddc->use_vertices && reuse_solvers) { 6439 PetscInt n_vertices; 6440 6441 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6442 reuse_solvers = (PetscBool)!n_vertices; 6443 } 6444 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6445 if (!isseqaij) { 6446 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6447 if (matis->A == pcbddc->local_mat) { 6448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6449 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6450 } else { 6451 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6452 } 6453 } 6454 if (!pcbddc->benign_change_explicit) { 6455 benign_n = pcbddc->benign_n; 6456 } else { 6457 benign_n = 0; 6458 } 6459 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6460 We need a global reduction to avoid possible deadlocks. 6461 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6462 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6463 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6464 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6465 need_change = (PetscBool)(!need_change); 6466 } 6467 /* If the user defines additional constraints, we import them here. 6468 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6469 if (need_change) { 6470 PC_IS *pcisf; 6471 PC_BDDC *pcbddcf; 6472 PC pcf; 6473 6474 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6475 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6476 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6477 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6478 /* hacks */ 6479 pcisf = (PC_IS*)pcf->data; 6480 pcisf->is_B_local = pcis->is_B_local; 6481 pcisf->vec1_N = pcis->vec1_N; 6482 pcisf->BtoNmap = pcis->BtoNmap; 6483 pcisf->n = pcis->n; 6484 pcisf->n_B = pcis->n_B; 6485 pcbddcf = (PC_BDDC*)pcf->data; 6486 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6487 pcbddcf->mat_graph = pcbddc->mat_graph; 6488 pcbddcf->use_faces = PETSC_TRUE; 6489 pcbddcf->use_change_of_basis = PETSC_TRUE; 6490 pcbddcf->use_change_on_faces = PETSC_TRUE; 6491 pcbddcf->use_qr_single = PETSC_TRUE; 6492 pcbddcf->fake_change = PETSC_TRUE; 6493 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6494 /* store information on primal vertices and change of basis (in local numbering) */ 6495 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6496 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6497 change = pcbddcf->ConstraintMatrix; 6498 pcbddcf->ConstraintMatrix = NULL; 6499 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6500 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6501 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6502 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6503 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6504 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6505 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6506 pcf->ops->destroy = NULL; 6507 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6508 } 6509 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6510 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6511 ierr = MatDestroy(&change);CHKERRQ(ierr); 6512 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6513 } 6514 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6515 6516 /* free adjacency */ 6517 if (free_used_adj) { 6518 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6519 } 6520 PetscFunctionReturn(0); 6521 } 6522 6523 #undef __FUNCT__ 6524 #define __FUNCT__ "PCBDDCInitSubSchurs" 6525 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6526 { 6527 PC_IS *pcis=(PC_IS*)pc->data; 6528 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6529 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6530 PCBDDCGraph graph; 6531 PetscErrorCode ierr; 6532 6533 PetscFunctionBegin; 6534 /* attach interface graph for determining subsets */ 6535 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6536 IS verticesIS,verticescomm; 6537 PetscInt vsize,*idxs; 6538 6539 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6540 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6541 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6542 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6543 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6544 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6545 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6546 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6547 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6548 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6549 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6550 } else { 6551 graph = pcbddc->mat_graph; 6552 } 6553 /* print some info */ 6554 if (pcbddc->dbg_flag) { 6555 IS vertices; 6556 PetscInt nv,nedges,nfaces; 6557 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6558 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6559 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6560 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6561 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6562 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6564 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6565 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6567 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6568 } 6569 6570 /* sub_schurs init */ 6571 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6572 6573 /* free graph struct */ 6574 if (pcbddc->sub_schurs_rebuild) { 6575 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6576 } 6577 PetscFunctionReturn(0); 6578 } 6579 6580 #undef __FUNCT__ 6581 #define __FUNCT__ "PCBDDCCheckOperator" 6582 PetscErrorCode PCBDDCCheckOperator(PC pc) 6583 { 6584 PC_IS *pcis=(PC_IS*)pc->data; 6585 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6586 PetscErrorCode ierr; 6587 6588 PetscFunctionBegin; 6589 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6590 IS zerodiag = NULL; 6591 Mat S_j,B0_B=NULL; 6592 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6593 PetscScalar *p0_check,*array,*array2; 6594 PetscReal norm; 6595 PetscInt i; 6596 6597 /* B0 and B0_B */ 6598 if (zerodiag) { 6599 IS dummy; 6600 6601 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6602 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6603 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6604 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6605 } 6606 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6607 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6608 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6609 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6610 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6611 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6612 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6613 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6614 /* S_j */ 6615 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6616 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6617 6618 /* mimic vector in \widetilde{W}_\Gamma */ 6619 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6620 /* continuous in primal space */ 6621 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6622 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6623 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6624 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6625 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6626 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6627 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6628 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6629 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6630 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6631 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6632 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6633 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6634 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6635 6636 /* assemble rhs for coarse problem */ 6637 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6638 /* local with Schur */ 6639 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6640 if (zerodiag) { 6641 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6642 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6643 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6644 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6645 } 6646 /* sum on primal nodes the local contributions */ 6647 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6648 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6649 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6650 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6651 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6652 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6653 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6654 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6655 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6656 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6657 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6658 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6659 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6660 /* scale primal nodes (BDDC sums contibutions) */ 6661 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6662 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6663 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6664 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6665 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6666 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6667 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6668 /* global: \widetilde{B0}_B w_\Gamma */ 6669 if (zerodiag) { 6670 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6671 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6672 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6673 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6674 } 6675 /* BDDC */ 6676 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6677 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6678 6679 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6680 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6681 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6682 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6683 for (i=0;i<pcbddc->benign_n;i++) { 6684 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6685 } 6686 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6687 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6688 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6689 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6690 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6691 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6692 } 6693 PetscFunctionReturn(0); 6694 } 6695