1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 9 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 10 { 11 PetscErrorCode ierr; 12 Vec local,global; 13 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 14 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 15 16 PetscFunctionBegin; 17 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 18 /* need to convert from global to local topology information and remove references to information in global ordering */ 19 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 20 if (pcbddc->user_provided_isfordofs) { 21 if (pcbddc->n_ISForDofs) { 22 PetscInt i; 23 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 24 for (i=0;i<pcbddc->n_ISForDofs;i++) { 25 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 26 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 27 } 28 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 29 pcbddc->n_ISForDofs = 0; 30 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 31 } 32 } else { 33 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 34 PetscInt i, n = matis->A->rmap->n; 35 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 36 if (i > 1) { 37 pcbddc->n_ISForDofsLocal = i; 38 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 39 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 40 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 41 } 42 } 43 } 44 } 45 46 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 47 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 48 } 49 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 50 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 51 } 52 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 53 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 54 } 55 ierr = VecDestroy(&global);CHKERRQ(ierr); 56 ierr = VecDestroy(&local);CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 62 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 63 { 64 PC_IS *pcis = (PC_IS*)(pc->data); 65 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 if (!pcbddc->benign_have_null) { 70 PetscFunctionReturn(0); 71 } 72 if (pcbddc->ChangeOfBasisMatrix) { 73 Vec swap; 74 75 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 76 swap = pcbddc->work_change; 77 pcbddc->work_change = r; 78 r = swap; 79 } 80 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 82 if (pcbddc->benign_n) { 83 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 84 } else { 85 ierr = VecSet(pcis->vec2_D,0.);CHKERRQ(ierr); 86 } 87 ierr = VecSet(z,0.);CHKERRQ(ierr); 88 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 89 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 90 if (pcbddc->ChangeOfBasisMatrix) { 91 Vec swap; 92 93 swap = r; 94 r = pcbddc->work_change; 95 pcbddc->work_change = swap; 96 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 97 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 104 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 105 { 106 PCBDDCBenignMatMult_ctx ctx; 107 PetscErrorCode ierr; 108 PetscBool apply_right,apply_left,reset_x; 109 110 PetscFunctionBegin; 111 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 112 if (transpose) { 113 apply_right = ctx->apply_left; 114 apply_left = ctx->apply_right; 115 } else { 116 apply_right = ctx->apply_right; 117 apply_left = ctx->apply_left; 118 } 119 reset_x = PETSC_FALSE; 120 if (apply_right) { 121 const PetscScalar *ax; 122 PetscInt nl,i; 123 124 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 125 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 126 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 127 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 128 for (i=0;i<ctx->benign_n;i++) { 129 PetscScalar sum,val; 130 const PetscInt *idxs; 131 PetscInt nz,j; 132 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 133 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 134 sum = 0.; 135 if (ctx->apply_p0) { 136 val = ctx->work[idxs[nz-1]]; 137 for (j=0;j<nz-1;j++) { 138 sum += ctx->work[idxs[j]]; 139 ctx->work[idxs[j]] += val; 140 } 141 } else { 142 for (j=0;j<nz-1;j++) { 143 sum += ctx->work[idxs[j]]; 144 } 145 } 146 ctx->work[idxs[nz-1]] -= sum; 147 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 148 } 149 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 150 reset_x = PETSC_TRUE; 151 } 152 if (transpose) { 153 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 154 } else { 155 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 156 } 157 if (reset_x) { 158 ierr = VecResetArray(x);CHKERRQ(ierr); 159 } 160 if (apply_left) { 161 PetscScalar *ay; 162 PetscInt i; 163 164 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 165 for (i=0;i<ctx->benign_n;i++) { 166 PetscScalar sum,val; 167 const PetscInt *idxs; 168 PetscInt nz,j; 169 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 170 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 171 val = -ay[idxs[nz-1]]; 172 if (ctx->apply_p0) { 173 sum = 0.; 174 for (j=0;j<nz-1;j++) { 175 sum += ay[idxs[j]]; 176 ay[idxs[j]] += val; 177 } 178 ay[idxs[nz-1]] += sum; 179 } else { 180 for (j=0;j<nz-1;j++) { 181 ay[idxs[j]] += val; 182 } 183 ay[idxs[nz-1]] = 0.; 184 } 185 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 186 } 187 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 194 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 195 { 196 PetscErrorCode ierr; 197 198 PetscFunctionBegin; 199 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 205 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 206 { 207 PetscErrorCode ierr; 208 209 PetscFunctionBegin; 210 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "PCBDDCBenignShellMat" 216 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 217 { 218 PC_IS *pcis = (PC_IS*)pc->data; 219 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 220 PCBDDCBenignMatMult_ctx ctx; 221 PetscErrorCode ierr; 222 223 PetscFunctionBegin; 224 if (!restore) { 225 Mat A_IB,A_BI; 226 PetscScalar *work; 227 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 228 229 if (pcbddc->benign_original_mat) { 230 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 231 } 232 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 233 PetscFunctionReturn(0); 234 } 235 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 237 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 238 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 239 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 240 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 241 ierr = PetscNew(&ctx);CHKERRQ(ierr); 242 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 243 ctx->apply_left = PETSC_TRUE; 244 ctx->apply_right = PETSC_FALSE; 245 ctx->apply_p0 = PETSC_FALSE; 246 ctx->benign_n = pcbddc->benign_n; 247 if (reuse) { 248 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 249 ctx->free = PETSC_FALSE; 250 } else { /* TODO: could be optimized for successive solves */ 251 ISLocalToGlobalMapping N_to_D; 252 PetscInt i; 253 254 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 255 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 256 for (i=0;i<pcbddc->benign_n;i++) { 257 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 258 } 259 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 260 ctx->free = PETSC_TRUE; 261 } 262 ctx->A = pcis->A_IB; 263 ctx->work = work; 264 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 265 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 266 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 267 pcis->A_IB = A_IB; 268 269 /* A_BI as A_IB^T */ 270 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 271 pcbddc->benign_original_mat = pcis->A_BI; 272 pcis->A_BI = A_BI; 273 } else { 274 if (!pcbddc->benign_original_mat) { 275 PetscFunctionReturn(0); 276 } 277 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 278 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 279 pcis->A_IB = ctx->A; 280 ctx->A = NULL; 281 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 282 pcis->A_BI = pcbddc->benign_original_mat; 283 pcbddc->benign_original_mat = NULL; 284 if (ctx->free) { 285 PetscInt i; 286 for (i=0;i<ctx->benign_n;i++) { 287 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 288 } 289 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 290 } 291 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 292 ierr = PetscFree(ctx);CHKERRQ(ierr); 293 } 294 PetscFunctionReturn(0); 295 } 296 297 /* used just in bddc debug mode */ 298 #undef __FUNCT__ 299 #define __FUNCT__ "PCBDDCBenignProject" 300 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 301 { 302 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 303 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 304 Mat An; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 309 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 310 if (is1) { 311 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 312 ierr = MatDestroy(&An);CHKERRQ(ierr); 313 } else { 314 *B = An; 315 } 316 PetscFunctionReturn(0); 317 } 318 319 /* TODO: add reuse flag */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSeqAIJCompress" 322 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 323 { 324 Mat Bt; 325 PetscScalar *a,*bdata; 326 const PetscInt *ii,*ij; 327 PetscInt m,n,i,nnz,*bii,*bij; 328 PetscBool flg_row; 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 333 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 334 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 335 nnz = n; 336 for (i=0;i<ii[n];i++) { 337 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 338 } 339 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 340 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 341 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 342 nnz = 0; 343 bii[0] = 0; 344 for (i=0;i<n;i++) { 345 PetscInt j; 346 for (j=ii[i];j<ii[i+1];j++) { 347 PetscScalar entry = a[j]; 348 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 349 bij[nnz] = ij[j]; 350 bdata[nnz] = entry; 351 nnz++; 352 } 353 } 354 bii[i+1] = nnz; 355 } 356 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 357 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 358 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 359 { 360 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 361 b->free_a = PETSC_TRUE; 362 b->free_ij = PETSC_TRUE; 363 } 364 *B = Bt; 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatDetectDisconnectedComponents" 370 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 371 { 372 Mat B; 373 IS is_dummy,*cc_n; 374 ISLocalToGlobalMapping l2gmap_dummy; 375 PCBDDCGraph graph; 376 PetscInt i,n; 377 PetscInt *xadj,*adjncy; 378 PetscInt *xadj_filtered,*adjncy_filtered; 379 PetscBool flg_row,isseqaij; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 if (!A->rmap->N || !A->cmap->N) { 384 *ncc = 0; 385 *cc = NULL; 386 PetscFunctionReturn(0); 387 } 388 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 389 if (!isseqaij && filter) { 390 PetscBool isseqdense; 391 392 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 393 if (!isseqdense) { 394 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 395 } else { /* TODO: rectangular case and LDA */ 396 PetscScalar *array; 397 PetscReal chop=1.e-6; 398 399 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 400 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 401 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 402 for (i=0;i<n;i++) { 403 PetscInt j; 404 for (j=i+1;j<n;j++) { 405 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 406 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 407 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 408 } 409 } 410 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 411 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 412 } 413 } else { 414 B = A; 415 } 416 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 417 418 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 419 if (filter) { 420 PetscScalar *data; 421 PetscInt j,cum; 422 423 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 424 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 425 cum = 0; 426 for (i=0;i<n;i++) { 427 PetscInt t; 428 429 for (j=xadj[i];j<xadj[i+1];j++) { 430 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 431 continue; 432 } 433 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 434 } 435 t = xadj_filtered[i]; 436 xadj_filtered[i] = cum; 437 cum += t; 438 } 439 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 440 } else { 441 xadj_filtered = NULL; 442 adjncy_filtered = NULL; 443 } 444 445 /* compute local connected components using PCBDDCGraph */ 446 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 447 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 448 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 449 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 450 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 451 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 452 if (xadj_filtered) { 453 graph->xadj = xadj_filtered; 454 graph->adjncy = adjncy_filtered; 455 } else { 456 graph->xadj = xadj; 457 graph->adjncy = adjncy; 458 } 459 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 460 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 461 /* partial clean up */ 462 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 463 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 464 if (A != B) { 465 ierr = MatDestroy(&B);CHKERRQ(ierr); 466 } 467 468 /* get back data */ 469 if (ncc) *ncc = graph->ncc; 470 if (cc) { 471 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 472 for (i=0;i<graph->ncc;i++) { 473 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 474 } 475 *cc = cc_n; 476 } 477 /* clean up graph */ 478 graph->xadj = 0; 479 graph->adjncy = 0; 480 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCBDDCBenignCheck" 486 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 487 { 488 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 489 PC_IS* pcis = (PC_IS*)(pc->data); 490 IS dirIS = NULL; 491 PetscInt i; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 496 if (zerodiag) { 497 Mat A; 498 Vec vec3_N; 499 PetscScalar *vals; 500 const PetscInt *idxs; 501 PetscInt nz,*count; 502 503 /* p0 */ 504 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 505 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 506 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 507 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 508 for (i=0;i<nz;i++) vals[i] = 1.; 509 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 512 /* v_I */ 513 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 514 for (i=0;i<nz;i++) vals[i] = 0.; 515 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 516 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 517 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 518 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 519 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 520 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 521 if (dirIS) { 522 PetscInt n; 523 524 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 525 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 526 for (i=0;i<n;i++) vals[i] = 0.; 527 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 528 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 529 } 530 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 532 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 533 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 534 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 535 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 536 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 537 if (PetscAbsScalar(vals[0]) > 1.e-1) { 538 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 539 } 540 ierr = PetscFree(vals);CHKERRQ(ierr); 541 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 542 543 /* there should not be any pressure dofs lying on the interface */ 544 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 545 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 546 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 547 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 548 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 549 for (i=0;i<nz;i++) { 550 if (count[idxs[i]]) { 551 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 552 } 553 } 554 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 555 ierr = PetscFree(count);CHKERRQ(ierr); 556 } 557 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 558 559 /* check PCBDDCBenignGetOrSetP0 */ 560 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 561 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 562 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 563 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 564 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 565 for (i=0;i<pcbddc->benign_n;i++) { 566 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 567 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 568 } 569 } 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 575 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 576 { 577 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 578 IS pressures,zerodiag,*zerodiag_subs; 579 PetscInt nz,n; 580 PetscInt *interior_dofs,n_interior_dofs; 581 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 582 PetscErrorCode ierr; 583 584 PetscFunctionBegin; 585 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 586 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 587 for (n=0;n<pcbddc->benign_n;n++) { 588 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 589 } 590 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 591 pcbddc->benign_n = 0; 592 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 593 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 594 Checks if all the pressure dofs in each subdomain have a zero diagonal 595 If not, a change of basis on pressures is not needed 596 since the local Schur complements are SPD 597 */ 598 has_null_pressures = PETSC_TRUE; 599 have_null = PETSC_TRUE; 600 if (pcbddc->n_ISForDofsLocal) { 601 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 602 603 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 604 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 605 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 607 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 608 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 609 if (!sorted) { 610 ierr = ISSort(pressures);CHKERRQ(ierr); 611 } 612 } else { 613 pressures = NULL; 614 } 615 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 616 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 617 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 618 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 619 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 620 if (!sorted) { 621 ierr = ISSort(zerodiag);CHKERRQ(ierr); 622 } 623 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 624 if (!nz) { 625 if (n) have_null = PETSC_FALSE; 626 has_null_pressures = PETSC_FALSE; 627 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 628 } 629 recompute_zerodiag = PETSC_FALSE; 630 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 631 zerodiag_subs = NULL; 632 pcbddc->benign_n = 0; 633 n_interior_dofs = 0; 634 interior_dofs = NULL; 635 if (pcbddc->current_level) { /* need to compute interior nodes */ 636 PetscInt n,i,j; 637 PetscInt n_neigh,*neigh,*n_shared,**shared; 638 PetscInt *iwork; 639 640 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 641 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 642 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 643 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 644 for (i=0;i<n_neigh;i++) 645 for (j=0;j<n_shared[i];j++) 646 iwork[shared[i][j]] += 1; 647 for (i=0;i<n;i++) 648 if (!iwork[i]) 649 interior_dofs[n_interior_dofs++] = i; 650 ierr = PetscFree(iwork);CHKERRQ(ierr); 651 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 652 } 653 if (has_null_pressures) { 654 IS *subs; 655 PetscInt nsubs,i,j,nl; 656 const PetscInt *idxs; 657 PetscScalar *array; 658 Vec *work; 659 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 660 661 subs = pcbddc->local_subs; 662 nsubs = pcbddc->n_local_subs; 663 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 664 if (pcbddc->current_level) { 665 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 666 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 667 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 668 /* work[0] = 1_p */ 669 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 670 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 671 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 672 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 673 /* work[0] = 1_v */ 674 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 675 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 676 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 677 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 678 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 679 } 680 if (nsubs > 1) { 681 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 682 for (i=0;i<nsubs;i++) { 683 ISLocalToGlobalMapping l2g; 684 IS t_zerodiag_subs; 685 PetscInt nl; 686 687 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 688 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 689 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 690 if (nl) { 691 PetscBool valid = PETSC_TRUE; 692 693 if (pcbddc->current_level) { 694 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 695 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 696 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 697 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 698 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 699 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 701 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 702 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 703 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 704 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 705 for (j=0;j<n_interior_dofs;j++) { 706 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 707 valid = PETSC_FALSE; 708 break; 709 } 710 } 711 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 712 } 713 if (valid && pcbddc->NeumannBoundariesLocal) { 714 IS t_bc; 715 PetscInt nzb; 716 717 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 718 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 719 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 720 if (nzb) valid = PETSC_FALSE; 721 } 722 if (valid && pressures) { 723 IS t_pressure_subs; 724 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 725 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 726 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 727 } 728 if (valid) { 729 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 730 pcbddc->benign_n++; 731 } else { 732 recompute_zerodiag = PETSC_TRUE; 733 } 734 } 735 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 736 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 737 } 738 } else { /* there's just one subdomain (or zero if they have not been detected */ 739 PetscBool valid = PETSC_TRUE; 740 741 if (pcbddc->NeumannBoundariesLocal) { 742 PetscInt nzb; 743 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 744 if (nzb) valid = PETSC_FALSE; 745 } 746 if (valid && pressures) { 747 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 748 } 749 if (valid && pcbddc->current_level) { 750 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 751 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 752 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 753 for (j=0;j<n_interior_dofs;j++) { 754 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 755 valid = PETSC_FALSE; 756 break; 757 } 758 } 759 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 760 } 761 if (valid) { 762 pcbddc->benign_n = 1; 763 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 764 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 765 zerodiag_subs[0] = zerodiag; 766 } 767 } 768 if (pcbddc->current_level) { 769 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 770 } 771 } 772 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 773 774 if (!pcbddc->benign_n) { 775 PetscInt n; 776 777 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 778 recompute_zerodiag = PETSC_FALSE; 779 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 780 if (n) { 781 has_null_pressures = PETSC_FALSE; 782 have_null = PETSC_FALSE; 783 } 784 } 785 786 /* final check for null pressures */ 787 if (zerodiag && pressures) { 788 PetscInt nz,np; 789 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 790 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 791 if (nz != np) have_null = PETSC_FALSE; 792 } 793 794 if (recompute_zerodiag) { 795 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 796 if (pcbddc->benign_n == 1) { 797 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 798 zerodiag = zerodiag_subs[0]; 799 } else { 800 PetscInt i,nzn,*new_idxs; 801 802 nzn = 0; 803 for (i=0;i<pcbddc->benign_n;i++) { 804 PetscInt ns; 805 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 806 nzn += ns; 807 } 808 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 809 nzn = 0; 810 for (i=0;i<pcbddc->benign_n;i++) { 811 PetscInt ns,*idxs; 812 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 813 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 814 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 815 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 816 nzn += ns; 817 } 818 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 819 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 820 } 821 have_null = PETSC_FALSE; 822 } 823 824 /* no-net-flux */ 825 if (pcbddc->benign_compute_nonetflux) { 826 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 827 MatNullSpace near_null_space; 828 ISLocalToGlobalMapping rmap; 829 Vec quad_vec; 830 PetscScalar *pvals; 831 PetscInt i,np,*dummyins; 832 IS isused = NULL; 833 PetscBool participate = PETSC_TRUE; 834 835 /* create vector to hold quadrature weights */ 836 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 837 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 838 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 839 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 840 841 /* compute B^{(i)T} * 1_p */ 842 np = 0; 843 if (pressures) { 844 isused = pressures; 845 } else { 846 isused = zerodiag; 847 } 848 if (isused) { 849 ierr = ISGetLocalSize(isused,&np);CHKERRQ(ierr); 850 } 851 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 852 for (i=0;i<np;i++) pvals[i] = 1.; 853 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 854 if (np) { 855 const PetscInt *pidxs; 856 857 if (isused) { 858 ierr = ISGetIndices(isused,&pidxs);CHKERRQ(ierr); 859 } 860 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 861 if (isused) { 862 ierr = ISRestoreIndices(isused,&pidxs);CHKERRQ(ierr); 863 } 864 } 865 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 866 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 867 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 868 ierr = PetscFree(pvals);CHKERRQ(ierr); 869 if (!isused) participate = PETSC_FALSE; 870 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 871 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 872 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 873 for (i=0;i<matis->sf->nleaves;i++) 874 if (participate) matis->sf_leafdata[i] = PetscGlobalRank; 875 else matis->sf_leafdata[i] = -1; 876 877 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 878 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 879 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 880 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 881 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 882 for (i=0;i<matis->sf->nleaves;i++) { 883 if (PetscGlobalRank != matis->sf_leafdata[i]) { 884 pvals[i] = 0.; 885 } 886 } 887 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 888 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 889 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 890 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 891 ierr = PetscFree(dummyins);CHKERRQ(ierr); 892 893 /* assembly quadrature vec and attach near null space to pmat */ 894 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 895 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 896 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 897 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 898 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 899 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 900 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 901 } 902 903 /* change of basis and p0 dofs */ 904 if (has_null_pressures) { 905 IS zerodiagc; 906 const PetscInt *idxs,*idxsc; 907 PetscInt i,s,*nnz; 908 909 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 910 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 911 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 912 /* local change of basis for pressures */ 913 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 914 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 915 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 916 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 917 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 918 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 919 for (i=0;i<pcbddc->benign_n;i++) { 920 PetscInt nzs,j; 921 922 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 923 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 924 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 925 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 926 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 927 } 928 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 929 ierr = PetscFree(nnz);CHKERRQ(ierr); 930 /* set identity on velocities */ 931 for (i=0;i<n-nz;i++) { 932 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 933 } 934 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 935 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 936 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 937 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 938 /* set change on pressures */ 939 for (s=0;s<pcbddc->benign_n;s++) { 940 PetscScalar *array; 941 PetscInt nzs; 942 943 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 944 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 945 for (i=0;i<nzs-1;i++) { 946 PetscScalar vals[2]; 947 PetscInt cols[2]; 948 949 cols[0] = idxs[i]; 950 cols[1] = idxs[nzs-1]; 951 vals[0] = 1.; 952 vals[1] = 1.; 953 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 954 } 955 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 956 for (i=0;i<nzs-1;i++) array[i] = -1.; 957 array[nzs-1] = 1.; 958 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 959 /* store local idxs for p0 */ 960 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 961 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 962 ierr = PetscFree(array);CHKERRQ(ierr); 963 } 964 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 /* project if needed */ 967 if (pcbddc->benign_change_explicit) { 968 Mat M; 969 970 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 971 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 972 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 973 ierr = MatDestroy(&M);CHKERRQ(ierr); 974 } 975 /* store global idxs for p0 */ 976 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 977 } 978 pcbddc->benign_zerodiag_subs = zerodiag_subs; 979 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 980 981 /* determines if the coarse solver will be singular or not */ 982 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 983 /* determines if the problem has subdomains with 0 pressure block */ 984 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 985 *zerodiaglocal = zerodiag; 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 991 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 992 { 993 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 994 PetscScalar *array; 995 PetscErrorCode ierr; 996 997 PetscFunctionBegin; 998 if (!pcbddc->benign_sf) { 999 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1000 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1001 } 1002 if (get) { 1003 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1004 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1005 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1006 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1007 } else { 1008 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1009 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1010 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1011 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1018 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1019 { 1020 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 /* TODO: add error checking 1025 - avoid nested pop (or push) calls. 1026 - cannot push before pop. 1027 - cannot call this if pcbddc->local_mat is NULL 1028 */ 1029 if (!pcbddc->benign_n) { 1030 PetscFunctionReturn(0); 1031 } 1032 if (pop) { 1033 if (pcbddc->benign_change_explicit) { 1034 IS is_p0; 1035 MatReuse reuse; 1036 1037 /* extract B_0 */ 1038 reuse = MAT_INITIAL_MATRIX; 1039 if (pcbddc->benign_B0) { 1040 reuse = MAT_REUSE_MATRIX; 1041 } 1042 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1043 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1044 /* remove rows and cols from local problem */ 1045 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1046 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1047 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1048 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1049 } else { 1050 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1051 PetscScalar *vals; 1052 PetscInt i,n,*idxs_ins; 1053 1054 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1055 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1056 if (!pcbddc->benign_B0) { 1057 PetscInt *nnz; 1058 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1059 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1060 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1061 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1062 for (i=0;i<pcbddc->benign_n;i++) { 1063 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1064 nnz[i] = n - nnz[i]; 1065 } 1066 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1067 ierr = PetscFree(nnz);CHKERRQ(ierr); 1068 } 1069 1070 for (i=0;i<pcbddc->benign_n;i++) { 1071 PetscScalar *array; 1072 PetscInt *idxs,j,nz,cum; 1073 1074 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1075 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1076 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1077 for (j=0;j<nz;j++) vals[j] = 1.; 1078 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1079 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1080 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1081 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1082 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1083 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1084 cum = 0; 1085 for (j=0;j<n;j++) { 1086 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1087 vals[cum] = array[j]; 1088 idxs_ins[cum] = j; 1089 cum++; 1090 } 1091 } 1092 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1093 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1094 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1095 } 1096 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1097 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1098 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1099 } 1100 } else { /* push */ 1101 if (pcbddc->benign_change_explicit) { 1102 PetscInt i; 1103 1104 for (i=0;i<pcbddc->benign_n;i++) { 1105 PetscScalar *B0_vals; 1106 PetscInt *B0_cols,B0_ncol; 1107 1108 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1109 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1110 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1111 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1112 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1113 } 1114 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1116 } else { 1117 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1118 } 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1125 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1126 { 1127 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1128 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1129 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1130 PetscBLASInt *B_iwork,*B_ifail; 1131 PetscScalar *work,lwork; 1132 PetscScalar *St,*S,*eigv; 1133 PetscScalar *Sarray,*Starray; 1134 PetscReal *eigs,thresh; 1135 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1136 PetscBool allocated_S_St; 1137 #if defined(PETSC_USE_COMPLEX) 1138 PetscReal *rwork; 1139 #endif 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1144 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1145 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1146 1147 if (pcbddc->dbg_flag) { 1148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1151 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1152 } 1153 1154 if (pcbddc->dbg_flag) { 1155 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1156 } 1157 1158 /* max size of subsets */ 1159 mss = 0; 1160 for (i=0;i<sub_schurs->n_subs;i++) { 1161 PetscInt subset_size; 1162 1163 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1164 mss = PetscMax(mss,subset_size); 1165 } 1166 1167 /* min/max and threshold */ 1168 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1169 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1170 nmax = PetscMax(nmin,nmax); 1171 allocated_S_St = PETSC_FALSE; 1172 if (nmin) { 1173 allocated_S_St = PETSC_TRUE; 1174 } 1175 1176 /* allocate lapack workspace */ 1177 cum = cum2 = 0; 1178 maxneigs = 0; 1179 for (i=0;i<sub_schurs->n_subs;i++) { 1180 PetscInt n,subset_size; 1181 1182 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1183 n = PetscMin(subset_size,nmax); 1184 cum += subset_size; 1185 cum2 += subset_size*n; 1186 maxneigs = PetscMax(maxneigs,n); 1187 } 1188 if (mss) { 1189 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1190 PetscBLASInt B_itype = 1; 1191 PetscBLASInt B_N = mss; 1192 PetscReal zero = 0.0; 1193 PetscReal eps = 0.0; /* dlamch? */ 1194 1195 B_lwork = -1; 1196 S = NULL; 1197 St = NULL; 1198 eigs = NULL; 1199 eigv = NULL; 1200 B_iwork = NULL; 1201 B_ifail = NULL; 1202 #if defined(PETSC_USE_COMPLEX) 1203 rwork = NULL; 1204 #endif 1205 thresh = 1.0; 1206 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1207 #if defined(PETSC_USE_COMPLEX) 1208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1209 #else 1210 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1211 #endif 1212 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1214 } else { 1215 /* TODO */ 1216 } 1217 } else { 1218 lwork = 0; 1219 } 1220 1221 nv = 0; 1222 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1223 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1224 } 1225 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1226 if (allocated_S_St) { 1227 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1228 } 1229 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1230 #if defined(PETSC_USE_COMPLEX) 1231 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1234 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1235 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1236 nv+cum,&pcbddc->adaptive_constraints_idxs, 1237 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1238 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1239 1240 maxneigs = 0; 1241 cum = cumarray = 0; 1242 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1243 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1244 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1245 const PetscInt *idxs; 1246 1247 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1248 for (cum=0;cum<nv;cum++) { 1249 pcbddc->adaptive_constraints_n[cum] = 1; 1250 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1251 pcbddc->adaptive_constraints_data[cum] = 1.0; 1252 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1253 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1254 } 1255 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1256 } 1257 1258 if (mss) { /* multilevel */ 1259 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1260 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1261 } 1262 1263 thresh = pcbddc->adaptive_threshold; 1264 for (i=0;i<sub_schurs->n_subs;i++) { 1265 const PetscInt *idxs; 1266 PetscReal upper,lower; 1267 PetscInt j,subset_size,eigs_start = 0; 1268 PetscBLASInt B_N; 1269 PetscBool same_data = PETSC_FALSE; 1270 1271 if (pcbddc->use_deluxe_scaling) { 1272 upper = PETSC_MAX_REAL; 1273 lower = thresh; 1274 } else { 1275 upper = 1./thresh; 1276 lower = 0.; 1277 } 1278 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1279 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1280 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1281 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1282 if (sub_schurs->is_hermitian) { 1283 PetscInt j,k; 1284 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1285 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1286 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1287 } 1288 for (j=0;j<subset_size;j++) { 1289 for (k=j;k<subset_size;k++) { 1290 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1291 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1292 } 1293 } 1294 } else { 1295 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1296 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1297 } 1298 } else { 1299 S = Sarray + cumarray; 1300 St = Starray + cumarray; 1301 } 1302 /* see if we can save some work */ 1303 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1304 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1305 } 1306 1307 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1308 B_neigs = 0; 1309 } else { 1310 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1311 PetscBLASInt B_itype = 1; 1312 PetscBLASInt B_IL, B_IU; 1313 PetscReal eps = -1.0; /* dlamch? */ 1314 PetscInt nmin_s; 1315 PetscBool compute_range = PETSC_FALSE; 1316 1317 if (pcbddc->dbg_flag) { 1318 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1319 } 1320 1321 compute_range = PETSC_FALSE; 1322 if (thresh > 1.+PETSC_SMALL && !same_data) { 1323 compute_range = PETSC_TRUE; 1324 } 1325 1326 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1327 if (compute_range) { 1328 1329 /* ask for eigenvalues larger than thresh */ 1330 #if defined(PETSC_USE_COMPLEX) 1331 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1332 #else 1333 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1334 #endif 1335 } else if (!same_data) { 1336 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1337 B_IL = 1; 1338 #if defined(PETSC_USE_COMPLEX) 1339 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1340 #else 1341 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1342 #endif 1343 } else { /* same_data is true, so get the adaptive function requested by the user */ 1344 PetscInt k; 1345 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1346 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1347 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1348 nmin = nmax; 1349 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1350 for (k=0;k<nmax;k++) { 1351 eigs[k] = 1./PETSC_SMALL; 1352 eigv[k*(subset_size+1)] = 1.0; 1353 } 1354 } 1355 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1356 if (B_ierr) { 1357 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1358 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1359 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1360 } 1361 1362 if (B_neigs > nmax) { 1363 if (pcbddc->dbg_flag) { 1364 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1365 } 1366 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1367 B_neigs = nmax; 1368 } 1369 1370 nmin_s = PetscMin(nmin,B_N); 1371 if (B_neigs < nmin_s) { 1372 PetscBLASInt B_neigs2; 1373 1374 if (pcbddc->use_deluxe_scaling) { 1375 B_IL = B_N - nmin_s + 1; 1376 B_IU = B_N - B_neigs; 1377 } else { 1378 B_IL = B_neigs + 1; 1379 B_IU = nmin_s; 1380 } 1381 if (pcbddc->dbg_flag) { 1382 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1383 } 1384 if (sub_schurs->is_hermitian) { 1385 PetscInt j; 1386 for (j=0;j<subset_size;j++) { 1387 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1388 } 1389 for (j=0;j<subset_size;j++) { 1390 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1391 } 1392 } else { 1393 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1394 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1395 } 1396 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1397 #if defined(PETSC_USE_COMPLEX) 1398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1399 #else 1400 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1401 #endif 1402 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1403 B_neigs += B_neigs2; 1404 } 1405 if (B_ierr) { 1406 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1407 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1408 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1409 } 1410 if (pcbddc->dbg_flag) { 1411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1412 for (j=0;j<B_neigs;j++) { 1413 if (eigs[j] == 0.0) { 1414 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1415 } else { 1416 if (pcbddc->use_deluxe_scaling) { 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1418 } else { 1419 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1420 } 1421 } 1422 } 1423 } 1424 } else { 1425 /* TODO */ 1426 } 1427 } 1428 /* change the basis back to the original one */ 1429 if (sub_schurs->change) { 1430 Mat change,phi,phit; 1431 1432 if (pcbddc->dbg_flag > 1) { 1433 PetscInt ii; 1434 for (ii=0;ii<B_neigs;ii++) { 1435 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1436 for (j=0;j<B_N;j++) { 1437 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1438 } 1439 } 1440 } 1441 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1442 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1443 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1444 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1445 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1446 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1447 } 1448 maxneigs = PetscMax(B_neigs,maxneigs); 1449 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1450 if (B_neigs) { 1451 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1452 1453 if (pcbddc->dbg_flag > 1) { 1454 PetscInt ii; 1455 for (ii=0;ii<B_neigs;ii++) { 1456 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1457 for (j=0;j<B_N;j++) { 1458 #if defined(PETSC_USE_COMPLEX) 1459 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1460 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1462 #else 1463 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1464 #endif 1465 } 1466 } 1467 } 1468 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1469 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1470 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1471 cum++; 1472 } 1473 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1474 /* shift for next computation */ 1475 cumarray += subset_size*subset_size; 1476 } 1477 if (pcbddc->dbg_flag) { 1478 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1479 } 1480 1481 if (mss) { 1482 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1483 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1484 /* destroy matrices (junk) */ 1485 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1486 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1487 } 1488 if (allocated_S_St) { 1489 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1490 } 1491 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1492 #if defined(PETSC_USE_COMPLEX) 1493 ierr = PetscFree(rwork);CHKERRQ(ierr); 1494 #endif 1495 if (pcbddc->dbg_flag) { 1496 PetscInt maxneigs_r; 1497 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1498 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "PCBDDCSetUpSolvers" 1505 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1506 { 1507 PetscScalar *coarse_submat_vals; 1508 PetscErrorCode ierr; 1509 1510 PetscFunctionBegin; 1511 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1512 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1513 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1514 1515 /* Setup local neumann solver ksp_R */ 1516 /* PCBDDCSetUpLocalScatters should be called first! */ 1517 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1518 1519 /* 1520 Setup local correction and local part of coarse basis. 1521 Gives back the dense local part of the coarse matrix in column major ordering 1522 */ 1523 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1524 1525 /* Compute total number of coarse nodes and setup coarse solver */ 1526 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1527 1528 /* free */ 1529 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCResetCustomization" 1535 PetscErrorCode PCBDDCResetCustomization(PC pc) 1536 { 1537 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1542 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1543 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1544 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1545 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1546 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1547 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1548 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1549 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1550 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1551 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "PCBDDCResetTopography" 1557 PetscErrorCode PCBDDCResetTopography(PC pc) 1558 { 1559 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1560 PetscInt i; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1565 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1566 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1567 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1568 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1569 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1570 for (i=0;i<pcbddc->n_local_subs;i++) { 1571 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1572 } 1573 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1574 if (pcbddc->sub_schurs) { 1575 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "PCBDDCResetSolvers" 1582 PetscErrorCode PCBDDCResetSolvers(PC pc) 1583 { 1584 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1585 PetscErrorCode ierr; 1586 1587 PetscFunctionBegin; 1588 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1589 if (pcbddc->coarse_phi_B) { 1590 PetscScalar *array; 1591 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1592 ierr = PetscFree(array);CHKERRQ(ierr); 1593 } 1594 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1595 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1596 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1597 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1598 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1599 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1600 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1601 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1602 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1603 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1604 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1605 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1606 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1607 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1608 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1609 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1610 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1611 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1612 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1613 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1614 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1615 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1616 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1617 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1618 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1619 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1621 if (pcbddc->benign_zerodiag_subs) { 1622 PetscInt i; 1623 for (i=0;i<pcbddc->benign_n;i++) { 1624 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1625 } 1626 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1627 } 1628 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1634 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1635 { 1636 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1637 PC_IS *pcis = (PC_IS*)pc->data; 1638 VecType impVecType; 1639 PetscInt n_constraints,n_R,old_size; 1640 PetscErrorCode ierr; 1641 1642 PetscFunctionBegin; 1643 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1644 /* get sizes */ 1645 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1646 n_R = pcis->n - pcbddc->n_vertices; 1647 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1648 /* local work vectors (try to avoid unneeded work)*/ 1649 /* R nodes */ 1650 old_size = -1; 1651 if (pcbddc->vec1_R) { 1652 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1653 } 1654 if (n_R != old_size) { 1655 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1656 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1657 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1658 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1659 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1660 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1661 } 1662 /* local primal dofs */ 1663 old_size = -1; 1664 if (pcbddc->vec1_P) { 1665 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1666 } 1667 if (pcbddc->local_primal_size != old_size) { 1668 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1669 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1670 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1671 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1672 } 1673 /* local explicit constraints */ 1674 old_size = -1; 1675 if (pcbddc->vec1_C) { 1676 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1677 } 1678 if (n_constraints && n_constraints != old_size) { 1679 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1680 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1681 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1682 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "PCBDDCSetUpCorrection" 1689 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1690 { 1691 PetscErrorCode ierr; 1692 /* pointers to pcis and pcbddc */ 1693 PC_IS* pcis = (PC_IS*)pc->data; 1694 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1695 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1696 /* submatrices of local problem */ 1697 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1698 /* submatrices of local coarse problem */ 1699 Mat S_VV,S_CV,S_VC,S_CC; 1700 /* working matrices */ 1701 Mat C_CR; 1702 /* additional working stuff */ 1703 PC pc_R; 1704 Mat F; 1705 Vec dummy_vec; 1706 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1707 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1708 PetscScalar *work; 1709 PetscInt *idx_V_B; 1710 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1711 PetscInt i,n_R,n_D,n_B; 1712 1713 /* some shortcuts to scalars */ 1714 PetscScalar one=1.0,m_one=-1.0; 1715 1716 PetscFunctionBegin; 1717 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1718 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1719 } 1720 1721 /* Set Non-overlapping dimensions */ 1722 n_vertices = pcbddc->n_vertices; 1723 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1724 n_B = pcis->n_B; 1725 n_D = pcis->n - n_B; 1726 n_R = pcis->n - n_vertices; 1727 1728 /* vertices in boundary numbering */ 1729 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1730 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1731 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1732 1733 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1734 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1735 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1736 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1737 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1738 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1739 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1740 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1742 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1743 1744 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1745 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1746 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1747 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1748 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1749 lda_rhs = n_R; 1750 need_benign_correction = PETSC_FALSE; 1751 if (isLU || isILU || isCHOL) { 1752 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1753 } else if (sub_schurs && sub_schurs->reuse_solver) { 1754 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1755 MatFactorType type; 1756 1757 F = reuse_solver->F; 1758 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1759 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1760 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1761 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1762 } else { 1763 F = NULL; 1764 } 1765 1766 /* allocate workspace */ 1767 n = 0; 1768 if (n_constraints) { 1769 n += lda_rhs*n_constraints; 1770 } 1771 if (n_vertices) { 1772 n = PetscMax(2*lda_rhs*n_vertices,n); 1773 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1774 } 1775 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1776 1777 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1778 dummy_vec = NULL; 1779 if (need_benign_correction && lda_rhs != n_R && F) { 1780 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1781 } 1782 1783 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1784 if (n_constraints) { 1785 Mat M1,M2,M3,C_B; 1786 IS is_aux; 1787 PetscScalar *array,*array2; 1788 1789 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1790 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1791 1792 /* Extract constraints on R nodes: C_{CR} */ 1793 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1794 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1795 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1796 1797 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1798 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1799 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1800 for (i=0;i<n_constraints;i++) { 1801 const PetscScalar *row_cmat_values; 1802 const PetscInt *row_cmat_indices; 1803 PetscInt size_of_constraint,j; 1804 1805 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1806 for (j=0;j<size_of_constraint;j++) { 1807 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1808 } 1809 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1810 } 1811 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1812 if (F) { 1813 Mat B; 1814 1815 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1816 if (need_benign_correction) { 1817 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1818 1819 /* rhs is already zero on interior dofs, no need to change the rhs */ 1820 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1821 } 1822 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1823 if (need_benign_correction) { 1824 PetscScalar *marr; 1825 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1826 1827 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1828 if (lda_rhs != n_R) { 1829 for (i=0;i<n_constraints;i++) { 1830 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1831 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1832 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1833 } 1834 } else { 1835 for (i=0;i<n_constraints;i++) { 1836 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1837 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1838 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1839 } 1840 } 1841 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1842 } 1843 ierr = MatDestroy(&B);CHKERRQ(ierr); 1844 } else { 1845 PetscScalar *marr; 1846 1847 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1848 for (i=0;i<n_constraints;i++) { 1849 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1850 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1851 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1852 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1853 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1854 } 1855 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1856 } 1857 if (!pcbddc->switch_static) { 1858 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1859 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1860 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1861 for (i=0;i<n_constraints;i++) { 1862 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1863 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1864 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1867 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1868 } 1869 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1870 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1871 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1872 } else { 1873 if (lda_rhs != n_R) { 1874 IS dummy; 1875 1876 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1877 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1878 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1879 } else { 1880 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1881 pcbddc->local_auxmat2 = local_auxmat2_R; 1882 } 1883 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1884 } 1885 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1886 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1887 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1888 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1889 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1890 if (isCHOL) { 1891 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1892 } else { 1893 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1894 } 1895 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1896 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1897 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1898 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1899 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1900 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1901 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1902 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1903 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1904 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1905 } 1906 1907 /* Get submatrices from subdomain matrix */ 1908 if (n_vertices) { 1909 IS is_aux; 1910 1911 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1912 IS tis; 1913 1914 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1915 ierr = ISSort(tis);CHKERRQ(ierr); 1916 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1917 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1918 } else { 1919 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1920 } 1921 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1922 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1923 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1924 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1925 } 1926 1927 /* Matrix of coarse basis functions (local) */ 1928 if (pcbddc->coarse_phi_B) { 1929 PetscInt on_B,on_primal,on_D=n_D; 1930 if (pcbddc->coarse_phi_D) { 1931 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1932 } 1933 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1934 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1935 PetscScalar *marray; 1936 1937 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1938 ierr = PetscFree(marray);CHKERRQ(ierr); 1939 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1940 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1941 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1942 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1943 } 1944 } 1945 1946 if (!pcbddc->coarse_phi_B) { 1947 PetscScalar *marray; 1948 1949 n = n_B*pcbddc->local_primal_size; 1950 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1951 n += n_D*pcbddc->local_primal_size; 1952 } 1953 if (!pcbddc->symmetric_primal) { 1954 n *= 2; 1955 } 1956 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1957 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1958 n = n_B*pcbddc->local_primal_size; 1959 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1960 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1961 n += n_D*pcbddc->local_primal_size; 1962 } 1963 if (!pcbddc->symmetric_primal) { 1964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1965 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1966 n = n_B*pcbddc->local_primal_size; 1967 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1968 } 1969 } else { 1970 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1971 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1972 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1973 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1974 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1975 } 1976 } 1977 } 1978 1979 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1980 p0_lidx_I = NULL; 1981 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1982 const PetscInt *idxs; 1983 1984 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1985 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1986 for (i=0;i<pcbddc->benign_n;i++) { 1987 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1988 } 1989 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1990 } 1991 1992 /* vertices */ 1993 if (n_vertices) { 1994 1995 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1996 1997 if (n_R) { 1998 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1999 PetscBLASInt B_N,B_one = 1; 2000 PetscScalar *x,*y; 2001 PetscBool isseqaij; 2002 2003 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2004 if (need_benign_correction) { 2005 ISLocalToGlobalMapping RtoN; 2006 IS is_p0; 2007 PetscInt *idxs_p0,n; 2008 2009 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2010 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2011 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2012 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2013 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2014 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2015 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2016 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2017 } 2018 2019 if (lda_rhs == n_R) { 2020 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2021 } else { 2022 PetscScalar *av,*array; 2023 const PetscInt *xadj,*adjncy; 2024 PetscInt n; 2025 PetscBool flg_row; 2026 2027 array = work+lda_rhs*n_vertices; 2028 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2029 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2030 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2031 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2032 for (i=0;i<n;i++) { 2033 PetscInt j; 2034 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2035 } 2036 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2037 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2038 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2039 } 2040 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2041 if (need_benign_correction) { 2042 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2043 PetscScalar *marr; 2044 2045 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2046 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2047 2048 | 0 0 0 | (V) 2049 L = | 0 0 -1 | (P-p0) 2050 | 0 0 -1 | (p0) 2051 2052 */ 2053 for (i=0;i<reuse_solver->benign_n;i++) { 2054 const PetscScalar *vals; 2055 const PetscInt *idxs,*idxs_zero; 2056 PetscInt n,j,nz; 2057 2058 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2059 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2060 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2061 for (j=0;j<n;j++) { 2062 PetscScalar val = vals[j]; 2063 PetscInt k,col = idxs[j]; 2064 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2065 } 2066 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2067 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2068 } 2069 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2070 } 2071 if (F) { 2072 /* need to correct the rhs */ 2073 if (need_benign_correction) { 2074 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2075 PetscScalar *marr; 2076 2077 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2078 if (lda_rhs != n_R) { 2079 for (i=0;i<n_vertices;i++) { 2080 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2081 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2082 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2083 } 2084 } else { 2085 for (i=0;i<n_vertices;i++) { 2086 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2087 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2088 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2089 } 2090 } 2091 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2092 } 2093 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2094 /* need to correct the solution */ 2095 if (need_benign_correction) { 2096 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2097 PetscScalar *marr; 2098 2099 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2100 if (lda_rhs != n_R) { 2101 for (i=0;i<n_vertices;i++) { 2102 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2103 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2104 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2105 } 2106 } else { 2107 for (i=0;i<n_vertices;i++) { 2108 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2109 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2110 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2111 } 2112 } 2113 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2114 } 2115 } else { 2116 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2117 for (i=0;i<n_vertices;i++) { 2118 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2119 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2120 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2121 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2122 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2123 } 2124 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2125 } 2126 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2127 /* S_VV and S_CV */ 2128 if (n_constraints) { 2129 Mat B; 2130 2131 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2132 for (i=0;i<n_vertices;i++) { 2133 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2134 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2135 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2137 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2138 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2139 } 2140 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2141 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2142 ierr = MatDestroy(&B);CHKERRQ(ierr); 2143 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2144 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2145 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2147 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2148 ierr = MatDestroy(&B);CHKERRQ(ierr); 2149 } 2150 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2151 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2152 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2153 } 2154 if (lda_rhs != n_R) { 2155 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2156 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2157 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2158 } 2159 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2160 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2161 if (need_benign_correction) { 2162 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2163 PetscScalar *marr,*sums; 2164 2165 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2166 ierr = MatDenseGetArray(S_VVt,&marr); 2167 for (i=0;i<reuse_solver->benign_n;i++) { 2168 const PetscScalar *vals; 2169 const PetscInt *idxs,*idxs_zero; 2170 PetscInt n,j,nz; 2171 2172 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2173 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2174 for (j=0;j<n_vertices;j++) { 2175 PetscInt k; 2176 sums[j] = 0.; 2177 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2178 } 2179 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2180 for (j=0;j<n;j++) { 2181 PetscScalar val = vals[j]; 2182 PetscInt k; 2183 for (k=0;k<n_vertices;k++) { 2184 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2185 } 2186 } 2187 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2188 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2189 } 2190 ierr = PetscFree(sums);CHKERRQ(ierr); 2191 ierr = MatDenseRestoreArray(S_VVt,&marr); 2192 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2193 } 2194 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2195 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2196 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2197 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2198 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2199 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2200 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2201 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2202 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2203 } else { 2204 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2205 } 2206 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2207 2208 /* coarse basis functions */ 2209 for (i=0;i<n_vertices;i++) { 2210 PetscScalar *y; 2211 2212 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2213 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2214 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2215 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2216 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2217 y[n_B*i+idx_V_B[i]] = 1.0; 2218 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2219 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2220 2221 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2222 PetscInt j; 2223 2224 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2225 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2226 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2227 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2228 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2229 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2230 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2231 } 2232 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2233 } 2234 /* if n_R == 0 the object is not destroyed */ 2235 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2236 } 2237 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2238 2239 if (n_constraints) { 2240 Mat B; 2241 2242 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2243 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2244 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2245 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2246 if (n_vertices) { 2247 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2248 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2249 } else { 2250 Mat S_VCt; 2251 2252 if (lda_rhs != n_R) { 2253 ierr = MatDestroy(&B);CHKERRQ(ierr); 2254 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2255 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2256 } 2257 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2258 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2259 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2260 } 2261 } 2262 ierr = MatDestroy(&B);CHKERRQ(ierr); 2263 /* coarse basis functions */ 2264 for (i=0;i<n_constraints;i++) { 2265 PetscScalar *y; 2266 2267 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2268 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2269 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2270 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2271 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2272 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2273 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2274 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2275 PetscInt j; 2276 2277 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2278 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2279 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2280 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2281 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2282 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2283 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2284 } 2285 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2286 } 2287 } 2288 if (n_constraints) { 2289 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2290 } 2291 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2292 2293 /* coarse matrix entries relative to B_0 */ 2294 if (pcbddc->benign_n) { 2295 Mat B0_B,B0_BPHI; 2296 IS is_dummy; 2297 PetscScalar *data; 2298 PetscInt j; 2299 2300 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2301 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2302 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2303 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2304 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2305 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2306 for (j=0;j<pcbddc->benign_n;j++) { 2307 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2308 for (i=0;i<pcbddc->local_primal_size;i++) { 2309 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2310 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2311 } 2312 } 2313 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2314 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2315 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2316 } 2317 2318 /* compute other basis functions for non-symmetric problems */ 2319 if (!pcbddc->symmetric_primal) { 2320 Mat B_V=NULL,B_C=NULL; 2321 PetscScalar *marray; 2322 2323 if (n_constraints) { 2324 Mat S_CCT,C_CRT; 2325 2326 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2327 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2328 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2329 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2330 if (n_vertices) { 2331 Mat S_VCT; 2332 2333 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2334 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2335 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2336 } 2337 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2338 } else { 2339 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2340 } 2341 if (n_vertices && n_R) { 2342 PetscScalar *av,*marray; 2343 const PetscInt *xadj,*adjncy; 2344 PetscInt n; 2345 PetscBool flg_row; 2346 2347 /* B_V = B_V - A_VR^T */ 2348 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2349 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2350 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2351 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2352 for (i=0;i<n;i++) { 2353 PetscInt j; 2354 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2355 } 2356 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2357 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2358 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2359 } 2360 2361 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2362 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2363 for (i=0;i<n_vertices;i++) { 2364 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2365 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2366 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2367 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2368 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2369 } 2370 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2371 if (B_C) { 2372 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2373 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2374 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2375 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2376 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2377 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2378 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2379 } 2380 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2381 } 2382 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2383 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2384 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2385 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2386 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2387 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2388 } 2389 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2390 /* coarse basis functions */ 2391 for (i=0;i<pcbddc->local_primal_size;i++) { 2392 PetscScalar *y; 2393 2394 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2395 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2396 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2397 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2398 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2399 if (i<n_vertices) { 2400 y[n_B*i+idx_V_B[i]] = 1.0; 2401 } 2402 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2403 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2404 2405 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2406 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2407 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2408 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2409 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2410 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2411 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2412 } 2413 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2414 } 2415 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2416 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2417 } 2418 /* free memory */ 2419 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2420 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2421 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2422 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2423 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2424 ierr = PetscFree(work);CHKERRQ(ierr); 2425 if (n_vertices) { 2426 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2427 } 2428 if (n_constraints) { 2429 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2430 } 2431 /* Checking coarse_sub_mat and coarse basis functios */ 2432 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2433 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2434 if (pcbddc->dbg_flag) { 2435 Mat coarse_sub_mat; 2436 Mat AUXMAT,TM1,TM2,TM3,TM4; 2437 Mat coarse_phi_D,coarse_phi_B; 2438 Mat coarse_psi_D,coarse_psi_B; 2439 Mat A_II,A_BB,A_IB,A_BI; 2440 Mat C_B,CPHI; 2441 IS is_dummy; 2442 Vec mones; 2443 MatType checkmattype=MATSEQAIJ; 2444 PetscReal real_value; 2445 2446 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2447 Mat A; 2448 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2449 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2450 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2451 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2452 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2453 ierr = MatDestroy(&A);CHKERRQ(ierr); 2454 } else { 2455 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2456 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2457 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2458 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2459 } 2460 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2461 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2462 if (!pcbddc->symmetric_primal) { 2463 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2464 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2465 } 2466 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2467 2468 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2470 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2471 if (!pcbddc->symmetric_primal) { 2472 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2473 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2474 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2475 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2476 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2477 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2478 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2479 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2480 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2481 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2482 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2483 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2484 } else { 2485 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2486 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2487 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2488 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2489 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2490 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2491 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2492 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2493 } 2494 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2495 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2496 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2497 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2498 if (pcbddc->benign_n) { 2499 Mat B0_B,B0_BPHI; 2500 PetscScalar *data,*data2; 2501 PetscInt j; 2502 2503 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2504 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2505 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2506 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2507 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2508 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2509 for (j=0;j<pcbddc->benign_n;j++) { 2510 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2511 for (i=0;i<pcbddc->local_primal_size;i++) { 2512 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2513 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2514 } 2515 } 2516 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2517 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2518 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2519 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2520 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2521 } 2522 #if 0 2523 { 2524 PetscViewer viewer; 2525 char filename[256]; 2526 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2527 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2528 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2529 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2530 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2531 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2532 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2533 if (save_change) { 2534 Mat phi_B; 2535 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2536 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2537 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2538 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2539 } else { 2540 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2541 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2542 } 2543 if (pcbddc->coarse_phi_D) { 2544 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2545 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2546 } 2547 if (pcbddc->coarse_psi_B) { 2548 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2549 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2550 } 2551 if (pcbddc->coarse_psi_D) { 2552 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2553 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2554 } 2555 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2556 } 2557 #endif 2558 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2559 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2560 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2561 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2562 2563 /* check constraints */ 2564 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2565 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2566 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2567 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2568 } else { 2569 PetscScalar *data; 2570 Mat tmat; 2571 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2572 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2573 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2574 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2575 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2576 } 2577 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2578 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2579 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2580 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2581 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2582 if (!pcbddc->symmetric_primal) { 2583 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2584 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2585 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2586 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2587 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2588 } 2589 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2590 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2591 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2592 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2593 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2594 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2595 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2596 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2597 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2598 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2599 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2600 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2601 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2602 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2603 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2604 if (!pcbddc->symmetric_primal) { 2605 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2606 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2607 } 2608 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2609 } 2610 /* get back data */ 2611 *coarse_submat_vals_n = coarse_submat_vals; 2612 PetscFunctionReturn(0); 2613 } 2614 2615 #undef __FUNCT__ 2616 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2617 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2618 { 2619 Mat *work_mat; 2620 IS isrow_s,iscol_s; 2621 PetscBool rsorted,csorted; 2622 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2623 PetscErrorCode ierr; 2624 2625 PetscFunctionBegin; 2626 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2627 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2628 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2629 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2630 2631 if (!rsorted) { 2632 const PetscInt *idxs; 2633 PetscInt *idxs_sorted,i; 2634 2635 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2636 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2637 for (i=0;i<rsize;i++) { 2638 idxs_perm_r[i] = i; 2639 } 2640 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2641 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2642 for (i=0;i<rsize;i++) { 2643 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2644 } 2645 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2646 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2647 } else { 2648 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2649 isrow_s = isrow; 2650 } 2651 2652 if (!csorted) { 2653 if (isrow == iscol) { 2654 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2655 iscol_s = isrow_s; 2656 } else { 2657 const PetscInt *idxs; 2658 PetscInt *idxs_sorted,i; 2659 2660 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2661 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2662 for (i=0;i<csize;i++) { 2663 idxs_perm_c[i] = i; 2664 } 2665 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2666 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2667 for (i=0;i<csize;i++) { 2668 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2669 } 2670 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2671 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2672 } 2673 } else { 2674 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2675 iscol_s = iscol; 2676 } 2677 2678 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2679 2680 if (!rsorted || !csorted) { 2681 Mat new_mat; 2682 IS is_perm_r,is_perm_c; 2683 2684 if (!rsorted) { 2685 PetscInt *idxs_r,i; 2686 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2687 for (i=0;i<rsize;i++) { 2688 idxs_r[idxs_perm_r[i]] = i; 2689 } 2690 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2691 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2692 } else { 2693 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2694 } 2695 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2696 2697 if (!csorted) { 2698 if (isrow_s == iscol_s) { 2699 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2700 is_perm_c = is_perm_r; 2701 } else { 2702 PetscInt *idxs_c,i; 2703 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2704 for (i=0;i<csize;i++) { 2705 idxs_c[idxs_perm_c[i]] = i; 2706 } 2707 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2708 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2709 } 2710 } else { 2711 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2712 } 2713 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2714 2715 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2716 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2717 work_mat[0] = new_mat; 2718 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2719 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2720 } 2721 2722 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2723 *B = work_mat[0]; 2724 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2725 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2726 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2727 PetscFunctionReturn(0); 2728 } 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2732 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2733 { 2734 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2735 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2736 Mat new_mat; 2737 IS is_local,is_global; 2738 PetscInt local_size; 2739 PetscBool isseqaij; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2744 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2745 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2746 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2747 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2748 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2749 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2750 2751 /* check */ 2752 if (pcbddc->dbg_flag) { 2753 Vec x,x_change; 2754 PetscReal error; 2755 2756 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2757 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2758 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2759 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2760 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2761 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2762 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2763 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2764 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2765 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2766 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2767 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2768 ierr = VecDestroy(&x);CHKERRQ(ierr); 2769 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2770 } 2771 2772 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2773 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2774 if (isseqaij) { 2775 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2776 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2777 } else { 2778 Mat work_mat; 2779 2780 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2781 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2782 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2783 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2784 } 2785 if (matis->A->symmetric_set) { 2786 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2787 #if !defined(PETSC_USE_COMPLEX) 2788 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2789 #endif 2790 } 2791 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2792 PetscFunctionReturn(0); 2793 } 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2797 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2798 { 2799 PC_IS* pcis = (PC_IS*)(pc->data); 2800 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2801 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2802 PetscInt *idx_R_local=NULL; 2803 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2804 PetscInt vbs,bs; 2805 PetscBT bitmask=NULL; 2806 PetscErrorCode ierr; 2807 2808 PetscFunctionBegin; 2809 /* 2810 No need to setup local scatters if 2811 - primal space is unchanged 2812 AND 2813 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2814 AND 2815 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2816 */ 2817 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2818 PetscFunctionReturn(0); 2819 } 2820 /* destroy old objects */ 2821 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2822 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2823 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2824 /* Set Non-overlapping dimensions */ 2825 n_B = pcis->n_B; 2826 n_D = pcis->n - n_B; 2827 n_vertices = pcbddc->n_vertices; 2828 2829 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2830 2831 /* create auxiliary bitmask and allocate workspace */ 2832 if (!sub_schurs || !sub_schurs->reuse_solver) { 2833 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2834 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2835 for (i=0;i<n_vertices;i++) { 2836 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2837 } 2838 2839 for (i=0, n_R=0; i<pcis->n; i++) { 2840 if (!PetscBTLookup(bitmask,i)) { 2841 idx_R_local[n_R++] = i; 2842 } 2843 } 2844 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2845 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2846 2847 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2848 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2849 } 2850 2851 /* Block code */ 2852 vbs = 1; 2853 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2854 if (bs>1 && !(n_vertices%bs)) { 2855 PetscBool is_blocked = PETSC_TRUE; 2856 PetscInt *vary; 2857 if (!sub_schurs || !sub_schurs->reuse_solver) { 2858 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2859 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2860 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2861 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2862 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2863 for (i=0; i<pcis->n/bs; i++) { 2864 if (vary[i]!=0 && vary[i]!=bs) { 2865 is_blocked = PETSC_FALSE; 2866 break; 2867 } 2868 } 2869 ierr = PetscFree(vary);CHKERRQ(ierr); 2870 } else { 2871 /* Verify directly the R set */ 2872 for (i=0; i<n_R/bs; i++) { 2873 PetscInt j,node=idx_R_local[bs*i]; 2874 for (j=1; j<bs; j++) { 2875 if (node != idx_R_local[bs*i+j]-j) { 2876 is_blocked = PETSC_FALSE; 2877 break; 2878 } 2879 } 2880 } 2881 } 2882 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2883 vbs = bs; 2884 for (i=0;i<n_R/vbs;i++) { 2885 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2886 } 2887 } 2888 } 2889 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2890 if (sub_schurs && sub_schurs->reuse_solver) { 2891 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2892 2893 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2894 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2895 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2896 reuse_solver->is_R = pcbddc->is_R_local; 2897 } else { 2898 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2899 } 2900 2901 /* print some info if requested */ 2902 if (pcbddc->dbg_flag) { 2903 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2905 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2906 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2907 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2908 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2909 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2910 } 2911 2912 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2913 if (!sub_schurs || !sub_schurs->reuse_solver) { 2914 IS is_aux1,is_aux2; 2915 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2916 2917 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2918 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2919 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2920 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2921 for (i=0; i<n_D; i++) { 2922 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2923 } 2924 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2925 for (i=0, j=0; i<n_R; i++) { 2926 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2927 aux_array1[j++] = i; 2928 } 2929 } 2930 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2931 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2932 for (i=0, j=0; i<n_B; i++) { 2933 if (!PetscBTLookup(bitmask,is_indices[i])) { 2934 aux_array2[j++] = i; 2935 } 2936 } 2937 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2938 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2939 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2940 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2941 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2942 2943 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2944 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2945 for (i=0, j=0; i<n_R; i++) { 2946 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2947 aux_array1[j++] = i; 2948 } 2949 } 2950 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2951 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2952 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2953 } 2954 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2955 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2956 } else { 2957 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2958 IS tis; 2959 PetscInt schur_size; 2960 2961 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2962 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2963 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2964 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2965 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2966 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2967 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2968 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2969 } 2970 } 2971 PetscFunctionReturn(0); 2972 } 2973 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2977 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2978 { 2979 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2980 PC_IS *pcis = (PC_IS*)pc->data; 2981 PC pc_temp; 2982 Mat A_RR; 2983 MatReuse reuse; 2984 PetscScalar m_one = -1.0; 2985 PetscReal value; 2986 PetscInt n_D,n_R; 2987 PetscBool check_corr[2],issbaij; 2988 PetscErrorCode ierr; 2989 /* prefixes stuff */ 2990 char dir_prefix[256],neu_prefix[256],str_level[16]; 2991 size_t len; 2992 2993 PetscFunctionBegin; 2994 2995 /* compute prefixes */ 2996 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2997 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2998 if (!pcbddc->current_level) { 2999 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3000 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3001 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3002 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3003 } else { 3004 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3005 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3006 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3007 len -= 15; /* remove "pc_bddc_coarse_" */ 3008 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3009 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3010 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3011 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3012 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3013 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3014 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3015 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3016 } 3017 3018 /* DIRICHLET PROBLEM */ 3019 if (dirichlet) { 3020 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3021 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3022 if (!sub_schurs || !sub_schurs->reuse_solver) { 3023 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3024 } 3025 if (pcbddc->dbg_flag) { 3026 Mat A_IIn; 3027 3028 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3029 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3030 pcis->A_II = A_IIn; 3031 } 3032 } 3033 if (pcbddc->local_mat->symmetric_set) { 3034 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3035 } 3036 /* Matrix for Dirichlet problem is pcis->A_II */ 3037 n_D = pcis->n - pcis->n_B; 3038 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3039 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3040 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3041 /* default */ 3042 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3043 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3044 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3045 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3046 if (issbaij) { 3047 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3048 } else { 3049 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3050 } 3051 /* Allow user's customization */ 3052 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3053 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3054 } 3055 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3056 if (sub_schurs && sub_schurs->reuse_solver) { 3057 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3058 3059 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3060 } 3061 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3062 if (!n_D) { 3063 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3064 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3065 } 3066 /* Set Up KSP for Dirichlet problem of BDDC */ 3067 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3068 /* set ksp_D into pcis data */ 3069 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3070 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3071 pcis->ksp_D = pcbddc->ksp_D; 3072 } 3073 3074 /* NEUMANN PROBLEM */ 3075 A_RR = 0; 3076 if (neumann) { 3077 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3078 PetscInt ibs,mbs; 3079 PetscBool issbaij; 3080 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3081 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3082 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3083 if (pcbddc->ksp_R) { /* already created ksp */ 3084 PetscInt nn_R; 3085 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3086 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3087 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3088 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3089 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3090 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3091 reuse = MAT_INITIAL_MATRIX; 3092 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3093 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3094 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3095 reuse = MAT_INITIAL_MATRIX; 3096 } else { /* safe to reuse the matrix */ 3097 reuse = MAT_REUSE_MATRIX; 3098 } 3099 } 3100 /* last check */ 3101 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3102 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3103 reuse = MAT_INITIAL_MATRIX; 3104 } 3105 } else { /* first time, so we need to create the matrix */ 3106 reuse = MAT_INITIAL_MATRIX; 3107 } 3108 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3109 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3110 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3111 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3112 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3113 if (matis->A == pcbddc->local_mat) { 3114 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3115 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3116 } else { 3117 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3118 } 3119 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3120 if (matis->A == pcbddc->local_mat) { 3121 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3122 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3123 } else { 3124 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3125 } 3126 } 3127 /* extract A_RR */ 3128 if (sub_schurs && sub_schurs->reuse_solver) { 3129 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3130 3131 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3132 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3133 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3134 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3135 } else { 3136 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3137 } 3138 } else { 3139 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3140 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3141 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3142 } 3143 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3144 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3145 } 3146 if (pcbddc->local_mat->symmetric_set) { 3147 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3148 } 3149 if (!pcbddc->ksp_R) { /* create object if not present */ 3150 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3151 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3152 /* default */ 3153 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3154 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3155 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3156 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3157 if (issbaij) { 3158 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3159 } else { 3160 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3161 } 3162 /* Allow user's customization */ 3163 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3164 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3165 } 3166 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3167 if (!n_R) { 3168 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3169 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3170 } 3171 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3172 /* Reuse solver if it is present */ 3173 if (sub_schurs && sub_schurs->reuse_solver) { 3174 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3175 3176 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3177 } 3178 /* Set Up KSP for Neumann problem of BDDC */ 3179 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3180 } 3181 3182 if (pcbddc->dbg_flag) { 3183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3184 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3186 } 3187 3188 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3189 check_corr[0] = check_corr[1] = PETSC_FALSE; 3190 if (pcbddc->NullSpace_corr[0]) { 3191 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3192 } 3193 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3194 check_corr[0] = PETSC_TRUE; 3195 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3196 } 3197 if (neumann && pcbddc->NullSpace_corr[2]) { 3198 check_corr[1] = PETSC_TRUE; 3199 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3200 } 3201 3202 /* check Dirichlet and Neumann solvers */ 3203 if (pcbddc->dbg_flag) { 3204 if (dirichlet) { /* Dirichlet */ 3205 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3206 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3207 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3208 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3209 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3210 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3211 if (check_corr[0]) { 3212 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3213 } 3214 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3215 } 3216 if (neumann) { /* Neumann */ 3217 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3218 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3219 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3220 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3221 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3223 if (check_corr[1]) { 3224 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3225 } 3226 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3227 } 3228 } 3229 /* free Neumann problem's matrix */ 3230 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3231 PetscFunctionReturn(0); 3232 } 3233 3234 #undef __FUNCT__ 3235 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3236 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3237 { 3238 PetscErrorCode ierr; 3239 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3240 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3241 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3242 3243 PetscFunctionBegin; 3244 if (!reuse_solver) { 3245 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3246 } 3247 if (!pcbddc->switch_static) { 3248 if (applytranspose && pcbddc->local_auxmat1) { 3249 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3250 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3251 } 3252 if (!reuse_solver) { 3253 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3254 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3255 } else { 3256 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3257 3258 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3259 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3260 } 3261 } else { 3262 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3263 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3264 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3265 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3266 if (applytranspose && pcbddc->local_auxmat1) { 3267 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3268 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3269 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3270 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3271 } 3272 } 3273 if (!reuse_solver || pcbddc->switch_static) { 3274 if (applytranspose) { 3275 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3276 } else { 3277 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3278 } 3279 } else { 3280 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3281 3282 if (applytranspose) { 3283 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3284 } else { 3285 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3286 } 3287 } 3288 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3289 if (!pcbddc->switch_static) { 3290 if (!reuse_solver) { 3291 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3292 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3293 } else { 3294 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3295 3296 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3297 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3298 } 3299 if (!applytranspose && pcbddc->local_auxmat1) { 3300 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3301 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3302 } 3303 } else { 3304 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3305 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3306 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3307 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3308 if (!applytranspose && pcbddc->local_auxmat1) { 3309 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3310 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3311 } 3312 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3313 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3314 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3315 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3316 } 3317 PetscFunctionReturn(0); 3318 } 3319 3320 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3323 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3324 { 3325 PetscErrorCode ierr; 3326 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3327 PC_IS* pcis = (PC_IS*) (pc->data); 3328 const PetscScalar zero = 0.0; 3329 3330 PetscFunctionBegin; 3331 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3332 if (!pcbddc->benign_apply_coarse_only) { 3333 if (applytranspose) { 3334 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3335 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3336 } else { 3337 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3338 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3339 } 3340 } else { 3341 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3342 } 3343 3344 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3345 if (pcbddc->benign_n) { 3346 PetscScalar *array; 3347 PetscInt j; 3348 3349 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3350 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3351 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3352 } 3353 3354 /* start communications from local primal nodes to rhs of coarse solver */ 3355 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3356 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3357 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3358 3359 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3360 if (pcbddc->coarse_ksp) { 3361 Mat coarse_mat; 3362 Vec rhs,sol; 3363 MatNullSpace nullsp; 3364 PetscBool isbddc = PETSC_FALSE; 3365 3366 if (pcbddc->benign_have_null) { 3367 PC coarse_pc; 3368 3369 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3370 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3371 /* we need to propagate to coarser levels the need for a possible benign correction */ 3372 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3373 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3374 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3375 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3376 } 3377 } 3378 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3379 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3380 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3381 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3382 if (nullsp) { 3383 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3384 } 3385 if (applytranspose) { 3386 if (pcbddc->benign_apply_coarse_only) { 3387 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3388 } else { 3389 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3390 } 3391 } else { 3392 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3393 PC coarse_pc; 3394 3395 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3396 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3397 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3398 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3399 } else { 3400 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3401 } 3402 } 3403 /* we don't need the benign correction at coarser levels anymore */ 3404 if (pcbddc->benign_have_null && isbddc) { 3405 PC coarse_pc; 3406 PC_BDDC* coarsepcbddc; 3407 3408 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3409 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3410 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3411 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3412 } 3413 if (nullsp) { 3414 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3415 } 3416 } 3417 3418 /* Local solution on R nodes */ 3419 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3420 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3421 } 3422 /* communications from coarse sol to local primal nodes */ 3423 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3424 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3425 3426 /* Sum contributions from the two levels */ 3427 if (!pcbddc->benign_apply_coarse_only) { 3428 if (applytranspose) { 3429 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3430 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3431 } else { 3432 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3433 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3434 } 3435 /* store p0 */ 3436 if (pcbddc->benign_n) { 3437 PetscScalar *array; 3438 PetscInt j; 3439 3440 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3441 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3442 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3443 } 3444 } else { /* expand the coarse solution */ 3445 if (applytranspose) { 3446 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3447 } else { 3448 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3449 } 3450 } 3451 PetscFunctionReturn(0); 3452 } 3453 3454 #undef __FUNCT__ 3455 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3456 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3457 { 3458 PetscErrorCode ierr; 3459 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3460 PetscScalar *array; 3461 Vec from,to; 3462 3463 PetscFunctionBegin; 3464 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3465 from = pcbddc->coarse_vec; 3466 to = pcbddc->vec1_P; 3467 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3468 Vec tvec; 3469 3470 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3471 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3472 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3473 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3474 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3475 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3476 } 3477 } else { /* from local to global -> put data in coarse right hand side */ 3478 from = pcbddc->vec1_P; 3479 to = pcbddc->coarse_vec; 3480 } 3481 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3482 PetscFunctionReturn(0); 3483 } 3484 3485 #undef __FUNCT__ 3486 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3487 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3488 { 3489 PetscErrorCode ierr; 3490 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3491 PetscScalar *array; 3492 Vec from,to; 3493 3494 PetscFunctionBegin; 3495 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3496 from = pcbddc->coarse_vec; 3497 to = pcbddc->vec1_P; 3498 } else { /* from local to global -> put data in coarse right hand side */ 3499 from = pcbddc->vec1_P; 3500 to = pcbddc->coarse_vec; 3501 } 3502 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3503 if (smode == SCATTER_FORWARD) { 3504 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3505 Vec tvec; 3506 3507 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3508 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3509 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3510 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3511 } 3512 } else { 3513 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3514 ierr = VecResetArray(from);CHKERRQ(ierr); 3515 } 3516 } 3517 PetscFunctionReturn(0); 3518 } 3519 3520 /* uncomment for testing purposes */ 3521 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3522 #undef __FUNCT__ 3523 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3524 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3525 { 3526 PetscErrorCode ierr; 3527 PC_IS* pcis = (PC_IS*)(pc->data); 3528 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3529 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3530 /* one and zero */ 3531 PetscScalar one=1.0,zero=0.0; 3532 /* space to store constraints and their local indices */ 3533 PetscScalar *constraints_data; 3534 PetscInt *constraints_idxs,*constraints_idxs_B; 3535 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3536 PetscInt *constraints_n; 3537 /* iterators */ 3538 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3539 /* BLAS integers */ 3540 PetscBLASInt lwork,lierr; 3541 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3542 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3543 /* reuse */ 3544 PetscInt olocal_primal_size,olocal_primal_size_cc; 3545 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3546 /* change of basis */ 3547 PetscBool qr_needed; 3548 PetscBT change_basis,qr_needed_idx; 3549 /* auxiliary stuff */ 3550 PetscInt *nnz,*is_indices; 3551 PetscInt ncc; 3552 /* some quantities */ 3553 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3554 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3555 3556 PetscFunctionBegin; 3557 /* Destroy Mat objects computed previously */ 3558 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3559 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3560 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3561 /* save info on constraints from previous setup (if any) */ 3562 olocal_primal_size = pcbddc->local_primal_size; 3563 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3564 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3565 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3566 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3567 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3568 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3569 3570 if (!pcbddc->adaptive_selection) { 3571 IS ISForVertices,*ISForFaces,*ISForEdges; 3572 MatNullSpace nearnullsp; 3573 const Vec *nearnullvecs; 3574 Vec *localnearnullsp; 3575 PetscScalar *array; 3576 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3577 PetscBool nnsp_has_cnst; 3578 /* LAPACK working arrays for SVD or POD */ 3579 PetscBool skip_lapack,boolforchange; 3580 PetscScalar *work; 3581 PetscReal *singular_vals; 3582 #if defined(PETSC_USE_COMPLEX) 3583 PetscReal *rwork; 3584 #endif 3585 #if defined(PETSC_MISSING_LAPACK_GESVD) 3586 PetscScalar *temp_basis,*correlation_mat; 3587 #else 3588 PetscBLASInt dummy_int=1; 3589 PetscScalar dummy_scalar=1.; 3590 #endif 3591 3592 /* Get index sets for faces, edges and vertices from graph */ 3593 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3594 /* print some info */ 3595 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3596 PetscInt nv; 3597 3598 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3599 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3600 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3602 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3605 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3606 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3607 } 3608 3609 /* free unneeded index sets */ 3610 if (!pcbddc->use_vertices) { 3611 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3612 } 3613 if (!pcbddc->use_edges) { 3614 for (i=0;i<n_ISForEdges;i++) { 3615 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3616 } 3617 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3618 n_ISForEdges = 0; 3619 } 3620 if (!pcbddc->use_faces) { 3621 for (i=0;i<n_ISForFaces;i++) { 3622 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3623 } 3624 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3625 n_ISForFaces = 0; 3626 } 3627 3628 /* check if near null space is attached to global mat */ 3629 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3630 if (nearnullsp) { 3631 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3632 /* remove any stored info */ 3633 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3634 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3635 /* store information for BDDC solver reuse */ 3636 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3637 pcbddc->onearnullspace = nearnullsp; 3638 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3639 for (i=0;i<nnsp_size;i++) { 3640 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3641 } 3642 } else { /* if near null space is not provided BDDC uses constants by default */ 3643 nnsp_size = 0; 3644 nnsp_has_cnst = PETSC_TRUE; 3645 } 3646 /* get max number of constraints on a single cc */ 3647 max_constraints = nnsp_size; 3648 if (nnsp_has_cnst) max_constraints++; 3649 3650 /* 3651 Evaluate maximum storage size needed by the procedure 3652 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3653 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3654 There can be multiple constraints per connected component 3655 */ 3656 n_vertices = 0; 3657 if (ISForVertices) { 3658 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3659 } 3660 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3661 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3662 3663 total_counts = n_ISForFaces+n_ISForEdges; 3664 total_counts *= max_constraints; 3665 total_counts += n_vertices; 3666 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3667 3668 total_counts = 0; 3669 max_size_of_constraint = 0; 3670 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3671 IS used_is; 3672 if (i<n_ISForEdges) { 3673 used_is = ISForEdges[i]; 3674 } else { 3675 used_is = ISForFaces[i-n_ISForEdges]; 3676 } 3677 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3678 total_counts += j; 3679 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3680 } 3681 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3682 3683 /* get local part of global near null space vectors */ 3684 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3685 for (k=0;k<nnsp_size;k++) { 3686 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3687 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3688 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3689 } 3690 3691 /* whether or not to skip lapack calls */ 3692 skip_lapack = PETSC_TRUE; 3693 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3694 3695 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3696 if (!skip_lapack) { 3697 PetscScalar temp_work; 3698 3699 #if defined(PETSC_MISSING_LAPACK_GESVD) 3700 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3701 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3702 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3703 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3704 #if defined(PETSC_USE_COMPLEX) 3705 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3706 #endif 3707 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3708 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3709 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3710 lwork = -1; 3711 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3712 #if !defined(PETSC_USE_COMPLEX) 3713 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3714 #else 3715 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3716 #endif 3717 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3718 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3719 #else /* on missing GESVD */ 3720 /* SVD */ 3721 PetscInt max_n,min_n; 3722 max_n = max_size_of_constraint; 3723 min_n = max_constraints; 3724 if (max_size_of_constraint < max_constraints) { 3725 min_n = max_size_of_constraint; 3726 max_n = max_constraints; 3727 } 3728 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3729 #if defined(PETSC_USE_COMPLEX) 3730 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3731 #endif 3732 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3733 lwork = -1; 3734 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3735 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3736 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3737 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3738 #if !defined(PETSC_USE_COMPLEX) 3739 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3740 #else 3741 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3742 #endif 3743 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3744 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3745 #endif /* on missing GESVD */ 3746 /* Allocate optimal workspace */ 3747 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3748 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3749 } 3750 /* Now we can loop on constraining sets */ 3751 total_counts = 0; 3752 constraints_idxs_ptr[0] = 0; 3753 constraints_data_ptr[0] = 0; 3754 /* vertices */ 3755 if (n_vertices) { 3756 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3757 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3758 for (i=0;i<n_vertices;i++) { 3759 constraints_n[total_counts] = 1; 3760 constraints_data[total_counts] = 1.0; 3761 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3762 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3763 total_counts++; 3764 } 3765 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3766 n_vertices = total_counts; 3767 } 3768 3769 /* edges and faces */ 3770 total_counts_cc = total_counts; 3771 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3772 IS used_is; 3773 PetscBool idxs_copied = PETSC_FALSE; 3774 3775 if (ncc<n_ISForEdges) { 3776 used_is = ISForEdges[ncc]; 3777 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3778 } else { 3779 used_is = ISForFaces[ncc-n_ISForEdges]; 3780 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3781 } 3782 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3783 3784 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3785 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3786 /* change of basis should not be performed on local periodic nodes */ 3787 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3788 if (nnsp_has_cnst) { 3789 PetscScalar quad_value; 3790 3791 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3792 idxs_copied = PETSC_TRUE; 3793 3794 if (!pcbddc->use_nnsp_true) { 3795 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3796 } else { 3797 quad_value = 1.0; 3798 } 3799 for (j=0;j<size_of_constraint;j++) { 3800 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3801 } 3802 temp_constraints++; 3803 total_counts++; 3804 } 3805 for (k=0;k<nnsp_size;k++) { 3806 PetscReal real_value; 3807 PetscScalar *ptr_to_data; 3808 3809 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3810 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3811 for (j=0;j<size_of_constraint;j++) { 3812 ptr_to_data[j] = array[is_indices[j]]; 3813 } 3814 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3815 /* check if array is null on the connected component */ 3816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3817 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3818 if (real_value > 0.0) { /* keep indices and values */ 3819 temp_constraints++; 3820 total_counts++; 3821 if (!idxs_copied) { 3822 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3823 idxs_copied = PETSC_TRUE; 3824 } 3825 } 3826 } 3827 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3828 valid_constraints = temp_constraints; 3829 if (!pcbddc->use_nnsp_true && temp_constraints) { 3830 if (temp_constraints == 1) { /* just normalize the constraint */ 3831 PetscScalar norm,*ptr_to_data; 3832 3833 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3834 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3835 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3836 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3837 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3838 } else { /* perform SVD */ 3839 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3840 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3841 3842 #if defined(PETSC_MISSING_LAPACK_GESVD) 3843 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3844 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3845 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3846 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3847 from that computed using LAPACKgesvd 3848 -> This is due to a different computation of eigenvectors in LAPACKheev 3849 -> The quality of the POD-computed basis will be the same */ 3850 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3851 /* Store upper triangular part of correlation matrix */ 3852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3853 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3854 for (j=0;j<temp_constraints;j++) { 3855 for (k=0;k<j+1;k++) { 3856 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3857 } 3858 } 3859 /* compute eigenvalues and eigenvectors of correlation matrix */ 3860 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3861 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3862 #if !defined(PETSC_USE_COMPLEX) 3863 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3864 #else 3865 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3866 #endif 3867 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3868 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3869 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3870 j = 0; 3871 while (j < temp_constraints && singular_vals[j] < tol) j++; 3872 total_counts = total_counts-j; 3873 valid_constraints = temp_constraints-j; 3874 /* scale and copy POD basis into used quadrature memory */ 3875 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3876 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3877 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3878 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3879 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3881 if (j<temp_constraints) { 3882 PetscInt ii; 3883 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3884 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3885 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3886 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3887 for (k=0;k<temp_constraints-j;k++) { 3888 for (ii=0;ii<size_of_constraint;ii++) { 3889 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3890 } 3891 } 3892 } 3893 #else /* on missing GESVD */ 3894 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3895 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3896 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3897 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3898 #if !defined(PETSC_USE_COMPLEX) 3899 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3900 #else 3901 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3902 #endif 3903 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3904 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3905 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3906 k = temp_constraints; 3907 if (k > size_of_constraint) k = size_of_constraint; 3908 j = 0; 3909 while (j < k && singular_vals[k-j-1] < tol) j++; 3910 valid_constraints = k-j; 3911 total_counts = total_counts-temp_constraints+valid_constraints; 3912 #endif /* on missing GESVD */ 3913 } 3914 } 3915 /* update pointers information */ 3916 if (valid_constraints) { 3917 constraints_n[total_counts_cc] = valid_constraints; 3918 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3919 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3920 /* set change_of_basis flag */ 3921 if (boolforchange) { 3922 PetscBTSet(change_basis,total_counts_cc); 3923 } 3924 total_counts_cc++; 3925 } 3926 } 3927 /* free workspace */ 3928 if (!skip_lapack) { 3929 ierr = PetscFree(work);CHKERRQ(ierr); 3930 #if defined(PETSC_USE_COMPLEX) 3931 ierr = PetscFree(rwork);CHKERRQ(ierr); 3932 #endif 3933 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3934 #if defined(PETSC_MISSING_LAPACK_GESVD) 3935 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3936 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3937 #endif 3938 } 3939 for (k=0;k<nnsp_size;k++) { 3940 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3941 } 3942 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3943 /* free index sets of faces, edges and vertices */ 3944 for (i=0;i<n_ISForFaces;i++) { 3945 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3946 } 3947 if (n_ISForFaces) { 3948 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3949 } 3950 for (i=0;i<n_ISForEdges;i++) { 3951 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3952 } 3953 if (n_ISForEdges) { 3954 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3955 } 3956 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3957 } else { 3958 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3959 3960 total_counts = 0; 3961 n_vertices = 0; 3962 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3963 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3964 } 3965 max_constraints = 0; 3966 total_counts_cc = 0; 3967 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3968 total_counts += pcbddc->adaptive_constraints_n[i]; 3969 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3970 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3971 } 3972 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3973 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3974 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3975 constraints_data = pcbddc->adaptive_constraints_data; 3976 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3977 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3978 total_counts_cc = 0; 3979 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3980 if (pcbddc->adaptive_constraints_n[i]) { 3981 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3982 } 3983 } 3984 #if 0 3985 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3986 for (i=0;i<total_counts_cc;i++) { 3987 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3988 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3989 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3990 printf(" %d",constraints_idxs[j]); 3991 } 3992 printf("\n"); 3993 printf("number of cc: %d\n",constraints_n[i]); 3994 } 3995 for (i=0;i<n_vertices;i++) { 3996 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3997 } 3998 for (i=0;i<sub_schurs->n_subs;i++) { 3999 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4000 } 4001 #endif 4002 4003 max_size_of_constraint = 0; 4004 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4005 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4006 /* Change of basis */ 4007 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4008 if (pcbddc->use_change_of_basis) { 4009 for (i=0;i<sub_schurs->n_subs;i++) { 4010 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4011 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4012 } 4013 } 4014 } 4015 } 4016 pcbddc->local_primal_size = total_counts; 4017 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4018 4019 /* map constraints_idxs in boundary numbering */ 4020 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4021 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4022 4023 /* Create constraint matrix */ 4024 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4025 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4026 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4027 4028 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4029 /* determine if a QR strategy is needed for change of basis */ 4030 qr_needed = PETSC_FALSE; 4031 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4032 total_primal_vertices=0; 4033 pcbddc->local_primal_size_cc = 0; 4034 for (i=0;i<total_counts_cc;i++) { 4035 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4036 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4037 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4038 pcbddc->local_primal_size_cc += 1; 4039 } else if (PetscBTLookup(change_basis,i)) { 4040 for (k=0;k<constraints_n[i];k++) { 4041 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4042 } 4043 pcbddc->local_primal_size_cc += constraints_n[i]; 4044 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4045 PetscBTSet(qr_needed_idx,i); 4046 qr_needed = PETSC_TRUE; 4047 } 4048 } else { 4049 pcbddc->local_primal_size_cc += 1; 4050 } 4051 } 4052 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4053 pcbddc->n_vertices = total_primal_vertices; 4054 /* permute indices in order to have a sorted set of vertices */ 4055 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4056 4057 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4058 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4059 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4060 4061 /* nonzero structure of constraint matrix */ 4062 /* and get reference dof for local constraints */ 4063 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4064 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4065 4066 j = total_primal_vertices; 4067 total_counts = total_primal_vertices; 4068 cum = total_primal_vertices; 4069 for (i=n_vertices;i<total_counts_cc;i++) { 4070 if (!PetscBTLookup(change_basis,i)) { 4071 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4072 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4073 cum++; 4074 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4075 for (k=0;k<constraints_n[i];k++) { 4076 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4077 nnz[j+k] = size_of_constraint; 4078 } 4079 j += constraints_n[i]; 4080 } 4081 } 4082 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4083 ierr = PetscFree(nnz);CHKERRQ(ierr); 4084 4085 /* set values in constraint matrix */ 4086 for (i=0;i<total_primal_vertices;i++) { 4087 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4088 } 4089 total_counts = total_primal_vertices; 4090 for (i=n_vertices;i<total_counts_cc;i++) { 4091 if (!PetscBTLookup(change_basis,i)) { 4092 PetscInt *cols; 4093 4094 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4095 cols = constraints_idxs+constraints_idxs_ptr[i]; 4096 for (k=0;k<constraints_n[i];k++) { 4097 PetscInt row = total_counts+k; 4098 PetscScalar *vals; 4099 4100 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4101 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4102 } 4103 total_counts += constraints_n[i]; 4104 } 4105 } 4106 /* assembling */ 4107 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4108 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4109 4110 /* 4111 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4112 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4113 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4114 */ 4115 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4116 if (pcbddc->use_change_of_basis) { 4117 /* dual and primal dofs on a single cc */ 4118 PetscInt dual_dofs,primal_dofs; 4119 /* working stuff for GEQRF */ 4120 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4121 PetscBLASInt lqr_work; 4122 /* working stuff for UNGQR */ 4123 PetscScalar *gqr_work,lgqr_work_t; 4124 PetscBLASInt lgqr_work; 4125 /* working stuff for TRTRS */ 4126 PetscScalar *trs_rhs; 4127 PetscBLASInt Blas_NRHS; 4128 /* pointers for values insertion into change of basis matrix */ 4129 PetscInt *start_rows,*start_cols; 4130 PetscScalar *start_vals; 4131 /* working stuff for values insertion */ 4132 PetscBT is_primal; 4133 PetscInt *aux_primal_numbering_B; 4134 /* matrix sizes */ 4135 PetscInt global_size,local_size; 4136 /* temporary change of basis */ 4137 Mat localChangeOfBasisMatrix; 4138 /* extra space for debugging */ 4139 PetscScalar *dbg_work; 4140 4141 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4142 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4143 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4144 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4145 /* nonzeros for local mat */ 4146 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4147 if (!pcbddc->benign_change || pcbddc->fake_change) { 4148 for (i=0;i<pcis->n;i++) nnz[i]=1; 4149 } else { 4150 const PetscInt *ii; 4151 PetscInt n; 4152 PetscBool flg_row; 4153 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4154 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4155 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4156 } 4157 for (i=n_vertices;i<total_counts_cc;i++) { 4158 if (PetscBTLookup(change_basis,i)) { 4159 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4160 if (PetscBTLookup(qr_needed_idx,i)) { 4161 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4162 } else { 4163 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4164 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4165 } 4166 } 4167 } 4168 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4169 ierr = PetscFree(nnz);CHKERRQ(ierr); 4170 /* Set interior change in the matrix */ 4171 if (!pcbddc->benign_change || pcbddc->fake_change) { 4172 for (i=0;i<pcis->n;i++) { 4173 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4174 } 4175 } else { 4176 const PetscInt *ii,*jj; 4177 PetscScalar *aa; 4178 PetscInt n; 4179 PetscBool flg_row; 4180 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4181 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4182 for (i=0;i<n;i++) { 4183 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4184 } 4185 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4186 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4187 } 4188 4189 if (pcbddc->dbg_flag) { 4190 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4191 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4192 } 4193 4194 4195 /* Now we loop on the constraints which need a change of basis */ 4196 /* 4197 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4198 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4199 4200 Basic blocks of change of basis matrix T computed by 4201 4202 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4203 4204 | 1 0 ... 0 s_1/S | 4205 | 0 1 ... 0 s_2/S | 4206 | ... | 4207 | 0 ... 1 s_{n-1}/S | 4208 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4209 4210 with S = \sum_{i=1}^n s_i^2 4211 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4212 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4213 4214 - QR decomposition of constraints otherwise 4215 */ 4216 if (qr_needed) { 4217 /* space to store Q */ 4218 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4219 /* first we issue queries for optimal work */ 4220 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4221 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4222 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4223 lqr_work = -1; 4224 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4225 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4226 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4227 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4228 lgqr_work = -1; 4229 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4230 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4231 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4232 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4233 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4234 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4235 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4236 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4237 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4238 /* array to store scaling factors for reflectors */ 4239 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4240 /* array to store rhs and solution of triangular solver */ 4241 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4242 /* allocating workspace for check */ 4243 if (pcbddc->dbg_flag) { 4244 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4245 } 4246 } 4247 /* array to store whether a node is primal or not */ 4248 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4249 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4250 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4251 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4252 for (i=0;i<total_primal_vertices;i++) { 4253 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4254 } 4255 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4256 4257 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4258 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4259 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4260 if (PetscBTLookup(change_basis,total_counts)) { 4261 /* get constraint info */ 4262 primal_dofs = constraints_n[total_counts]; 4263 dual_dofs = size_of_constraint-primal_dofs; 4264 4265 if (pcbddc->dbg_flag) { 4266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4267 } 4268 4269 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4270 4271 /* copy quadrature constraints for change of basis check */ 4272 if (pcbddc->dbg_flag) { 4273 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4274 } 4275 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4276 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4277 4278 /* compute QR decomposition of constraints */ 4279 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4280 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4281 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4282 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4283 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4284 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4285 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4286 4287 /* explictly compute R^-T */ 4288 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4289 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4290 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4291 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4292 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4293 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4294 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4295 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4296 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4297 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4298 4299 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4300 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4301 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4302 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4303 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4304 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4305 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4306 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4307 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4308 4309 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4310 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4311 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4313 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4314 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4315 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4316 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4317 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4318 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4319 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4320 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4321 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4322 4323 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4324 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4325 /* insert cols for primal dofs */ 4326 for (j=0;j<primal_dofs;j++) { 4327 start_vals = &qr_basis[j*size_of_constraint]; 4328 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4329 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4330 } 4331 /* insert cols for dual dofs */ 4332 for (j=0,k=0;j<dual_dofs;k++) { 4333 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4334 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4335 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4336 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4337 j++; 4338 } 4339 } 4340 4341 /* check change of basis */ 4342 if (pcbddc->dbg_flag) { 4343 PetscInt ii,jj; 4344 PetscBool valid_qr=PETSC_TRUE; 4345 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4347 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4348 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4349 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4350 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4351 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4352 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4354 for (jj=0;jj<size_of_constraint;jj++) { 4355 for (ii=0;ii<primal_dofs;ii++) { 4356 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4357 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4358 } 4359 } 4360 if (!valid_qr) { 4361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4362 for (jj=0;jj<size_of_constraint;jj++) { 4363 for (ii=0;ii<primal_dofs;ii++) { 4364 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4365 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4366 } 4367 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4368 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4369 } 4370 } 4371 } 4372 } else { 4373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4374 } 4375 } 4376 } else { /* simple transformation block */ 4377 PetscInt row,col; 4378 PetscScalar val,norm; 4379 4380 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4381 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4382 for (j=0;j<size_of_constraint;j++) { 4383 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4384 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4385 if (!PetscBTLookup(is_primal,row_B)) { 4386 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4387 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4388 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4389 } else { 4390 for (k=0;k<size_of_constraint;k++) { 4391 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4392 if (row != col) { 4393 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4394 } else { 4395 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4396 } 4397 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4398 } 4399 } 4400 } 4401 if (pcbddc->dbg_flag) { 4402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4403 } 4404 } 4405 } else { 4406 if (pcbddc->dbg_flag) { 4407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4408 } 4409 } 4410 } 4411 4412 /* free workspace */ 4413 if (qr_needed) { 4414 if (pcbddc->dbg_flag) { 4415 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4416 } 4417 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4418 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4419 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4420 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4421 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4422 } 4423 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4424 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4425 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4426 4427 /* assembling of global change of variable */ 4428 if (!pcbddc->fake_change) { 4429 Mat tmat; 4430 PetscInt bs; 4431 4432 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4433 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4434 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4435 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4436 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4437 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4438 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4439 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4440 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4441 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4442 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4443 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4444 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4445 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4446 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4447 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4448 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4449 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4450 4451 /* check */ 4452 if (pcbddc->dbg_flag) { 4453 PetscReal error; 4454 Vec x,x_change; 4455 4456 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4457 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4458 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4459 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4460 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4461 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4462 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4463 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4464 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4465 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4466 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4467 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4470 ierr = VecDestroy(&x);CHKERRQ(ierr); 4471 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4472 } 4473 /* adapt sub_schurs computed (if any) */ 4474 if (pcbddc->use_deluxe_scaling) { 4475 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4476 4477 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4478 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4479 } 4480 if (sub_schurs && sub_schurs->S_Ej_all) { 4481 Mat S_new,tmat; 4482 IS is_all_N,is_V_Sall = NULL; 4483 4484 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4485 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4486 if (pcbddc->deluxe_zerorows) { 4487 ISLocalToGlobalMapping NtoSall; 4488 IS is_V; 4489 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4490 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4491 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4492 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4493 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4494 } 4495 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4496 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4497 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4498 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4499 if (pcbddc->deluxe_zerorows) { 4500 const PetscScalar *array; 4501 const PetscInt *idxs_V,*idxs_all; 4502 PetscInt i,n_V; 4503 4504 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4505 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4506 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4507 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4508 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4509 for (i=0;i<n_V;i++) { 4510 PetscScalar val; 4511 PetscInt idx; 4512 4513 idx = idxs_V[i]; 4514 val = array[idxs_all[idxs_V[i]]]; 4515 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4516 } 4517 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4519 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4520 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4521 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4522 } 4523 sub_schurs->S_Ej_all = S_new; 4524 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4525 if (sub_schurs->sum_S_Ej_all) { 4526 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4527 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4528 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4529 if (pcbddc->deluxe_zerorows) { 4530 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4531 } 4532 sub_schurs->sum_S_Ej_all = S_new; 4533 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4534 } 4535 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4536 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4537 } 4538 /* destroy any change of basis context in sub_schurs */ 4539 if (sub_schurs && sub_schurs->change) { 4540 PetscInt i; 4541 4542 for (i=0;i<sub_schurs->n_subs;i++) { 4543 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4544 } 4545 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4546 } 4547 } 4548 if (pcbddc->switch_static) { /* need to save the local change */ 4549 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4550 } else { 4551 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4552 } 4553 /* determine if any process has changed the pressures locally */ 4554 pcbddc->change_interior = pcbddc->benign_have_null; 4555 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4556 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4557 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4558 pcbddc->use_qr_single = qr_needed; 4559 } 4560 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4561 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4562 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4563 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4564 } else { 4565 Mat benign_global = NULL; 4566 if (pcbddc->benign_have_null) { 4567 Mat tmat; 4568 4569 pcbddc->change_interior = PETSC_TRUE; 4570 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4571 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4572 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4573 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4574 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4575 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4576 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4577 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4578 if (pcbddc->benign_change) { 4579 Mat M; 4580 4581 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4582 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4583 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4584 ierr = MatDestroy(&M);CHKERRQ(ierr); 4585 } else { 4586 Mat eye; 4587 PetscScalar *array; 4588 4589 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4590 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4591 for (i=0;i<pcis->n;i++) { 4592 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4593 } 4594 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4595 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4596 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4597 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4598 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4599 } 4600 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4601 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4602 } 4603 if (pcbddc->user_ChangeOfBasisMatrix) { 4604 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4605 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4606 } else if (pcbddc->benign_have_null) { 4607 pcbddc->ChangeOfBasisMatrix = benign_global; 4608 } 4609 } 4610 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4611 IS is_global; 4612 const PetscInt *gidxs; 4613 4614 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4615 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4616 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4617 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4618 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4619 } 4620 } 4621 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4622 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4623 } 4624 4625 if (!pcbddc->fake_change) { 4626 /* add pressure dofs to set of primal nodes for numbering purposes */ 4627 for (i=0;i<pcbddc->benign_n;i++) { 4628 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4629 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4630 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4631 pcbddc->local_primal_size_cc++; 4632 pcbddc->local_primal_size++; 4633 } 4634 4635 /* check if a new primal space has been introduced (also take into account benign trick) */ 4636 pcbddc->new_primal_space_local = PETSC_TRUE; 4637 if (olocal_primal_size == pcbddc->local_primal_size) { 4638 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4639 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4640 if (!pcbddc->new_primal_space_local) { 4641 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4642 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4643 } 4644 } 4645 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4646 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4647 } 4648 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4649 4650 /* flush dbg viewer */ 4651 if (pcbddc->dbg_flag) { 4652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4653 } 4654 4655 /* free workspace */ 4656 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4657 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4658 if (!pcbddc->adaptive_selection) { 4659 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4660 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4661 } else { 4662 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4663 pcbddc->adaptive_constraints_idxs_ptr, 4664 pcbddc->adaptive_constraints_data_ptr, 4665 pcbddc->adaptive_constraints_idxs, 4666 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4667 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4668 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4669 } 4670 PetscFunctionReturn(0); 4671 } 4672 4673 #undef __FUNCT__ 4674 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4675 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4676 { 4677 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4678 PC_IS *pcis = (PC_IS*)pc->data; 4679 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4680 PetscInt ierr,i,N; 4681 4682 PetscFunctionBegin; 4683 /* Reset previously computed graph */ 4684 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4685 /* Init local Graph struct */ 4686 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4687 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4688 4689 /* Check validity of the csr graph passed in by the user */ 4690 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4691 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4692 } 4693 4694 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4695 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4696 PetscInt *xadj,*adjncy; 4697 PetscInt nvtxs; 4698 PetscBool flg_row=PETSC_FALSE; 4699 4700 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4701 if (flg_row) { 4702 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4703 pcbddc->computed_rowadj = PETSC_TRUE; 4704 } 4705 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4706 } 4707 if (pcbddc->dbg_flag) { 4708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4709 } 4710 4711 /* Setup of Graph */ 4712 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4713 4714 /* attach info on disconnected subdomains if present */ 4715 if (pcbddc->n_local_subs) { 4716 PetscInt *local_subs; 4717 4718 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4719 for (i=0;i<pcbddc->n_local_subs;i++) { 4720 const PetscInt *idxs; 4721 PetscInt nl,j; 4722 4723 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4724 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4725 for (j=0;j<nl;j++) { 4726 local_subs[idxs[j]] = i; 4727 } 4728 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4729 } 4730 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4731 pcbddc->mat_graph->local_subs = local_subs; 4732 } 4733 4734 /* Graph's connected components analysis */ 4735 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4736 PetscFunctionReturn(0); 4737 } 4738 4739 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4740 #undef __FUNCT__ 4741 #define __FUNCT__ "PCBDDCSubsetNumbering" 4742 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4743 { 4744 PetscSF sf; 4745 PetscLayout map; 4746 const PetscInt *idxs; 4747 PetscInt *leaf_data,*root_data,*gidxs; 4748 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4749 PetscInt n_n,nlocals,start,first_index; 4750 PetscMPIInt commsize; 4751 PetscBool first_found; 4752 PetscErrorCode ierr; 4753 4754 PetscFunctionBegin; 4755 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4756 if (subset_mult) { 4757 PetscCheckSameComm(subset,1,subset_mult,2); 4758 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4759 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4760 } 4761 /* create workspace layout for computing global indices of subset */ 4762 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4763 lbounds[0] = lbounds[1] = 0; 4764 for (i=0;i<n;i++) { 4765 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4766 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4767 } 4768 lbounds[0] = -lbounds[0]; 4769 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4770 gbounds[0] = -gbounds[0]; 4771 N = gbounds[1] - gbounds[0] + 1; 4772 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4773 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4774 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4775 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4776 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4777 4778 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4779 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4780 if (subset_mult) { 4781 const PetscInt* idxs_mult; 4782 4783 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4784 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4785 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4786 } else { 4787 for (i=0;i<n;i++) leaf_data[i] = 1; 4788 } 4789 /* local size of new subset */ 4790 n_n = 0; 4791 for (i=0;i<n;i++) n_n += leaf_data[i]; 4792 4793 /* global indexes in layout */ 4794 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4795 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4796 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4797 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4798 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4799 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4800 4801 /* reduce from leaves to roots */ 4802 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4803 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4804 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4805 4806 /* count indexes in local part of layout */ 4807 nlocals = 0; 4808 first_index = -1; 4809 first_found = PETSC_FALSE; 4810 for (i=0;i<Nl;i++) { 4811 if (!first_found && root_data[i]) { 4812 first_found = PETSC_TRUE; 4813 first_index = i; 4814 } 4815 nlocals += root_data[i]; 4816 } 4817 4818 /* cumulative of number of indexes and size of subset without holes */ 4819 #if defined(PETSC_HAVE_MPI_EXSCAN) 4820 start = 0; 4821 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4822 #else 4823 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4824 start = start-nlocals; 4825 #endif 4826 4827 if (N_n) { /* compute total size of new subset if requested */ 4828 *N_n = start + nlocals; 4829 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4830 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4831 } 4832 4833 /* adapt root data with cumulative */ 4834 if (first_found) { 4835 PetscInt old_index; 4836 4837 root_data[first_index] += start; 4838 old_index = first_index; 4839 for (i=first_index+1;i<Nl;i++) { 4840 if (root_data[i]) { 4841 root_data[i] += root_data[old_index]; 4842 old_index = i; 4843 } 4844 } 4845 } 4846 4847 /* from roots to leaves */ 4848 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4849 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4850 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4851 4852 /* create new IS with global indexes without holes */ 4853 if (subset_mult) { 4854 const PetscInt* idxs_mult; 4855 PetscInt cum; 4856 4857 cum = 0; 4858 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4859 for (i=0;i<n;i++) { 4860 PetscInt j; 4861 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4862 } 4863 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4864 } else { 4865 for (i=0;i<n;i++) { 4866 gidxs[i] = leaf_data[i]-1; 4867 } 4868 } 4869 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4870 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4871 PetscFunctionReturn(0); 4872 } 4873 4874 #undef __FUNCT__ 4875 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4876 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4877 { 4878 PetscInt i,j; 4879 PetscScalar *alphas; 4880 PetscErrorCode ierr; 4881 4882 PetscFunctionBegin; 4883 /* this implements stabilized Gram-Schmidt */ 4884 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4885 for (i=0;i<n;i++) { 4886 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4887 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4888 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4889 } 4890 ierr = PetscFree(alphas);CHKERRQ(ierr); 4891 PetscFunctionReturn(0); 4892 } 4893 4894 #undef __FUNCT__ 4895 #define __FUNCT__ "MatISGetSubassemblingPattern" 4896 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4897 { 4898 Mat A; 4899 PetscInt n_neighs,*neighs,*n_shared,**shared; 4900 PetscMPIInt size,rank,color; 4901 PetscInt *xadj,*adjncy; 4902 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4903 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4904 PetscInt void_procs,*procs_candidates = NULL; 4905 PetscInt xadj_count, *count; 4906 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4907 PetscSubcomm psubcomm; 4908 MPI_Comm subcomm; 4909 PetscErrorCode ierr; 4910 4911 PetscFunctionBegin; 4912 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4913 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4914 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4915 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4916 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4917 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4918 4919 if (have_void) *have_void = PETSC_FALSE; 4920 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4921 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4922 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4923 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4924 im_active = !!(n); 4925 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4926 void_procs = size - active_procs; 4927 /* get ranks of of non-active processes in mat communicator */ 4928 if (void_procs) { 4929 PetscInt ncand; 4930 4931 if (have_void) *have_void = PETSC_TRUE; 4932 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4933 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4934 for (i=0,ncand=0;i<size;i++) { 4935 if (!procs_candidates[i]) { 4936 procs_candidates[ncand++] = i; 4937 } 4938 } 4939 /* force n_subdomains to be not greater that the number of non-active processes */ 4940 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4941 } 4942 4943 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4944 if (active_procs < *n_subdomains) { 4945 PetscInt issize,isidx; 4946 if (im_active) { 4947 issize = 1; 4948 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4949 isidx = procs_candidates[rank]; 4950 } else { 4951 isidx = rank; 4952 } 4953 } else { 4954 issize = 0; 4955 isidx = -1; 4956 } 4957 *n_subdomains = active_procs; 4958 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4959 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4960 PetscFunctionReturn(0); 4961 } 4962 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4963 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4964 threshold = PetscMax(threshold,2); 4965 4966 /* Get info on mapping */ 4967 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4968 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4969 4970 /* build local CSR graph of subdomains' connectivity */ 4971 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4972 xadj[0] = 0; 4973 xadj[1] = PetscMax(n_neighs-1,0); 4974 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4975 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4976 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4977 for (i=1;i<n_neighs;i++) 4978 for (j=0;j<n_shared[i];j++) 4979 count[shared[i][j]] += 1; 4980 4981 xadj_count = 0; 4982 for (i=1;i<n_neighs;i++) { 4983 for (j=0;j<n_shared[i];j++) { 4984 if (count[shared[i][j]] < threshold) { 4985 adjncy[xadj_count] = neighs[i]; 4986 adjncy_wgt[xadj_count] = n_shared[i]; 4987 xadj_count++; 4988 break; 4989 } 4990 } 4991 } 4992 xadj[1] = xadj_count; 4993 ierr = PetscFree(count);CHKERRQ(ierr); 4994 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4995 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4996 4997 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4998 4999 /* Restrict work on active processes only */ 5000 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5001 if (void_procs) { 5002 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5003 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5004 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5005 subcomm = PetscSubcommChild(psubcomm); 5006 } else { 5007 psubcomm = NULL; 5008 subcomm = PetscObjectComm((PetscObject)mat); 5009 } 5010 5011 v_wgt = NULL; 5012 if (!color) { 5013 ierr = PetscFree(xadj);CHKERRQ(ierr); 5014 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5015 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5016 } else { 5017 Mat subdomain_adj; 5018 IS new_ranks,new_ranks_contig; 5019 MatPartitioning partitioner; 5020 PetscInt rstart=0,rend=0; 5021 PetscInt *is_indices,*oldranks; 5022 PetscMPIInt size; 5023 PetscBool aggregate; 5024 5025 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5026 if (void_procs) { 5027 PetscInt prank = rank; 5028 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5029 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5030 for (i=0;i<xadj[1];i++) { 5031 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5032 } 5033 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5034 } else { 5035 oldranks = NULL; 5036 } 5037 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5038 if (aggregate) { /* TODO: all this part could be made more efficient */ 5039 PetscInt lrows,row,ncols,*cols; 5040 PetscMPIInt nrank; 5041 PetscScalar *vals; 5042 5043 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5044 lrows = 0; 5045 if (nrank<redprocs) { 5046 lrows = size/redprocs; 5047 if (nrank<size%redprocs) lrows++; 5048 } 5049 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5050 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5051 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5052 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5053 row = nrank; 5054 ncols = xadj[1]-xadj[0]; 5055 cols = adjncy; 5056 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5057 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5058 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5059 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5060 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5061 ierr = PetscFree(xadj);CHKERRQ(ierr); 5062 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5063 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5064 ierr = PetscFree(vals);CHKERRQ(ierr); 5065 if (use_vwgt) { 5066 Vec v; 5067 const PetscScalar *array; 5068 PetscInt nl; 5069 5070 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5071 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5072 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5073 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5074 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5075 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5076 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5077 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5078 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5079 ierr = VecDestroy(&v);CHKERRQ(ierr); 5080 } 5081 } else { 5082 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5083 if (use_vwgt) { 5084 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5085 v_wgt[0] = local_size; 5086 } 5087 } 5088 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5089 5090 /* Partition */ 5091 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5092 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5093 if (v_wgt) { 5094 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5095 } 5096 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5097 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5098 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5099 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5100 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5101 5102 /* renumber new_ranks to avoid "holes" in new set of processors */ 5103 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5104 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5105 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5106 if (!aggregate) { 5107 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5108 #if defined(PETSC_USE_DEBUG) 5109 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5110 #endif 5111 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5112 } else if (oldranks) { 5113 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5114 } else { 5115 ranks_send_to_idx[0] = is_indices[0]; 5116 } 5117 } else { 5118 PetscInt idxs[1]; 5119 PetscMPIInt tag; 5120 MPI_Request *reqs; 5121 5122 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5123 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5124 for (i=rstart;i<rend;i++) { 5125 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5126 } 5127 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5128 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5129 ierr = PetscFree(reqs);CHKERRQ(ierr); 5130 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5131 #if defined(PETSC_USE_DEBUG) 5132 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5133 #endif 5134 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5135 } else if (oldranks) { 5136 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5137 } else { 5138 ranks_send_to_idx[0] = idxs[0]; 5139 } 5140 } 5141 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5142 /* clean up */ 5143 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5144 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5145 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5146 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5147 } 5148 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5149 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5150 5151 /* assemble parallel IS for sends */ 5152 i = 1; 5153 if (!color) i=0; 5154 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5155 PetscFunctionReturn(0); 5156 } 5157 5158 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5159 5160 #undef __FUNCT__ 5161 #define __FUNCT__ "MatISSubassemble" 5162 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5163 { 5164 Mat local_mat; 5165 IS is_sends_internal; 5166 PetscInt rows,cols,new_local_rows; 5167 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5168 PetscBool ismatis,isdense,newisdense,destroy_mat; 5169 ISLocalToGlobalMapping l2gmap; 5170 PetscInt* l2gmap_indices; 5171 const PetscInt* is_indices; 5172 MatType new_local_type; 5173 /* buffers */ 5174 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5175 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5176 PetscInt *recv_buffer_idxs_local; 5177 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5178 /* MPI */ 5179 MPI_Comm comm,comm_n; 5180 PetscSubcomm subcomm; 5181 PetscMPIInt n_sends,n_recvs,commsize; 5182 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5183 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5184 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5185 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5186 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5187 PetscErrorCode ierr; 5188 5189 PetscFunctionBegin; 5190 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5191 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5192 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5193 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5194 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5195 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5196 PetscValidLogicalCollectiveBool(mat,reuse,6); 5197 PetscValidLogicalCollectiveInt(mat,nis,8); 5198 5199 /* further checks */ 5200 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5201 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5202 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5203 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5204 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5205 if (reuse && *mat_n) { 5206 PetscInt mrows,mcols,mnrows,mncols; 5207 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5208 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5209 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5210 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5211 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5212 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5213 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5214 } 5215 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5216 PetscValidLogicalCollectiveInt(mat,bs,0); 5217 5218 /* prepare IS for sending if not provided */ 5219 if (!is_sends) { 5220 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5221 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5222 } else { 5223 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5224 is_sends_internal = is_sends; 5225 } 5226 5227 /* get comm */ 5228 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5229 5230 /* compute number of sends */ 5231 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5232 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5233 5234 /* compute number of receives */ 5235 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5236 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5237 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5238 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5239 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5240 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5241 ierr = PetscFree(iflags);CHKERRQ(ierr); 5242 5243 /* restrict comm if requested */ 5244 subcomm = 0; 5245 destroy_mat = PETSC_FALSE; 5246 if (restrict_comm) { 5247 PetscMPIInt color,subcommsize; 5248 5249 color = 0; 5250 if (restrict_full) { 5251 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5252 } else { 5253 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5254 } 5255 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5256 subcommsize = commsize - subcommsize; 5257 /* check if reuse has been requested */ 5258 if (reuse) { 5259 if (*mat_n) { 5260 PetscMPIInt subcommsize2; 5261 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5262 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5263 comm_n = PetscObjectComm((PetscObject)*mat_n); 5264 } else { 5265 comm_n = PETSC_COMM_SELF; 5266 } 5267 } else { /* MAT_INITIAL_MATRIX */ 5268 PetscMPIInt rank; 5269 5270 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5271 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5272 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5273 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5274 comm_n = PetscSubcommChild(subcomm); 5275 } 5276 /* flag to destroy *mat_n if not significative */ 5277 if (color) destroy_mat = PETSC_TRUE; 5278 } else { 5279 comm_n = comm; 5280 } 5281 5282 /* prepare send/receive buffers */ 5283 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5284 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5285 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5286 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5287 if (nis) { 5288 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5289 } 5290 5291 /* Get data from local matrices */ 5292 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5293 /* TODO: See below some guidelines on how to prepare the local buffers */ 5294 /* 5295 send_buffer_vals should contain the raw values of the local matrix 5296 send_buffer_idxs should contain: 5297 - MatType_PRIVATE type 5298 - PetscInt size_of_l2gmap 5299 - PetscInt global_row_indices[size_of_l2gmap] 5300 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5301 */ 5302 else { 5303 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5304 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5305 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5306 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5307 send_buffer_idxs[1] = i; 5308 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5309 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5310 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5311 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5312 for (i=0;i<n_sends;i++) { 5313 ilengths_vals[is_indices[i]] = len*len; 5314 ilengths_idxs[is_indices[i]] = len+2; 5315 } 5316 } 5317 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5318 /* additional is (if any) */ 5319 if (nis) { 5320 PetscMPIInt psum; 5321 PetscInt j; 5322 for (j=0,psum=0;j<nis;j++) { 5323 PetscInt plen; 5324 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5325 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5326 psum += len+1; /* indices + lenght */ 5327 } 5328 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5329 for (j=0,psum=0;j<nis;j++) { 5330 PetscInt plen; 5331 const PetscInt *is_array_idxs; 5332 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5333 send_buffer_idxs_is[psum] = plen; 5334 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5335 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5336 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5337 psum += plen+1; /* indices + lenght */ 5338 } 5339 for (i=0;i<n_sends;i++) { 5340 ilengths_idxs_is[is_indices[i]] = psum; 5341 } 5342 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5343 } 5344 5345 buf_size_idxs = 0; 5346 buf_size_vals = 0; 5347 buf_size_idxs_is = 0; 5348 for (i=0;i<n_recvs;i++) { 5349 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5350 buf_size_vals += (PetscInt)olengths_vals[i]; 5351 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5352 } 5353 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5354 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5355 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5356 5357 /* get new tags for clean communications */ 5358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5360 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5361 5362 /* allocate for requests */ 5363 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5364 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5365 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5366 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5367 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5368 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5369 5370 /* communications */ 5371 ptr_idxs = recv_buffer_idxs; 5372 ptr_vals = recv_buffer_vals; 5373 ptr_idxs_is = recv_buffer_idxs_is; 5374 for (i=0;i<n_recvs;i++) { 5375 source_dest = onodes[i]; 5376 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5377 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5378 ptr_idxs += olengths_idxs[i]; 5379 ptr_vals += olengths_vals[i]; 5380 if (nis) { 5381 source_dest = onodes_is[i]; 5382 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5383 ptr_idxs_is += olengths_idxs_is[i]; 5384 } 5385 } 5386 for (i=0;i<n_sends;i++) { 5387 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5388 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5389 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5390 if (nis) { 5391 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5392 } 5393 } 5394 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5395 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5396 5397 /* assemble new l2g map */ 5398 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5399 ptr_idxs = recv_buffer_idxs; 5400 new_local_rows = 0; 5401 for (i=0;i<n_recvs;i++) { 5402 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5403 ptr_idxs += olengths_idxs[i]; 5404 } 5405 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5406 ptr_idxs = recv_buffer_idxs; 5407 new_local_rows = 0; 5408 for (i=0;i<n_recvs;i++) { 5409 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5410 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5411 ptr_idxs += olengths_idxs[i]; 5412 } 5413 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5414 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5415 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5416 5417 /* infer new local matrix type from received local matrices type */ 5418 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5419 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5420 if (n_recvs) { 5421 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5422 ptr_idxs = recv_buffer_idxs; 5423 for (i=0;i<n_recvs;i++) { 5424 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5425 new_local_type_private = MATAIJ_PRIVATE; 5426 break; 5427 } 5428 ptr_idxs += olengths_idxs[i]; 5429 } 5430 switch (new_local_type_private) { 5431 case MATDENSE_PRIVATE: 5432 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5433 new_local_type = MATSEQAIJ; 5434 bs = 1; 5435 } else { /* if I receive only 1 dense matrix */ 5436 new_local_type = MATSEQDENSE; 5437 bs = 1; 5438 } 5439 break; 5440 case MATAIJ_PRIVATE: 5441 new_local_type = MATSEQAIJ; 5442 bs = 1; 5443 break; 5444 case MATBAIJ_PRIVATE: 5445 new_local_type = MATSEQBAIJ; 5446 break; 5447 case MATSBAIJ_PRIVATE: 5448 new_local_type = MATSEQSBAIJ; 5449 break; 5450 default: 5451 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5452 break; 5453 } 5454 } else { /* by default, new_local_type is seqdense */ 5455 new_local_type = MATSEQDENSE; 5456 bs = 1; 5457 } 5458 5459 /* create MATIS object if needed */ 5460 if (!reuse) { 5461 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5462 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5463 } else { 5464 /* it also destroys the local matrices */ 5465 if (*mat_n) { 5466 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5467 } else { /* this is a fake object */ 5468 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5469 } 5470 } 5471 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5472 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5473 5474 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5475 5476 /* Global to local map of received indices */ 5477 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5478 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5479 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5480 5481 /* restore attributes -> type of incoming data and its size */ 5482 buf_size_idxs = 0; 5483 for (i=0;i<n_recvs;i++) { 5484 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5485 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5486 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5487 } 5488 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5489 5490 /* set preallocation */ 5491 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5492 if (!newisdense) { 5493 PetscInt *new_local_nnz=0; 5494 5495 ptr_idxs = recv_buffer_idxs_local; 5496 if (n_recvs) { 5497 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5498 } 5499 for (i=0;i<n_recvs;i++) { 5500 PetscInt j; 5501 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5502 for (j=0;j<*(ptr_idxs+1);j++) { 5503 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5504 } 5505 } else { 5506 /* TODO */ 5507 } 5508 ptr_idxs += olengths_idxs[i]; 5509 } 5510 if (new_local_nnz) { 5511 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5512 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5513 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5514 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5515 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5516 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5517 } else { 5518 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5519 } 5520 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5521 } else { 5522 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5523 } 5524 5525 /* set values */ 5526 ptr_vals = recv_buffer_vals; 5527 ptr_idxs = recv_buffer_idxs_local; 5528 for (i=0;i<n_recvs;i++) { 5529 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5530 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5531 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5532 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5533 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5534 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5535 } else { 5536 /* TODO */ 5537 } 5538 ptr_idxs += olengths_idxs[i]; 5539 ptr_vals += olengths_vals[i]; 5540 } 5541 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5542 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5543 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5544 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5545 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5546 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5547 5548 #if 0 5549 if (!restrict_comm) { /* check */ 5550 Vec lvec,rvec; 5551 PetscReal infty_error; 5552 5553 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5554 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5555 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5556 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5557 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5558 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5559 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5560 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5561 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5562 } 5563 #endif 5564 5565 /* assemble new additional is (if any) */ 5566 if (nis) { 5567 PetscInt **temp_idxs,*count_is,j,psum; 5568 5569 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5570 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5571 ptr_idxs = recv_buffer_idxs_is; 5572 psum = 0; 5573 for (i=0;i<n_recvs;i++) { 5574 for (j=0;j<nis;j++) { 5575 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5576 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5577 psum += plen; 5578 ptr_idxs += plen+1; /* shift pointer to received data */ 5579 } 5580 } 5581 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5582 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5583 for (i=1;i<nis;i++) { 5584 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5585 } 5586 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5587 ptr_idxs = recv_buffer_idxs_is; 5588 for (i=0;i<n_recvs;i++) { 5589 for (j=0;j<nis;j++) { 5590 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5591 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5592 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5593 ptr_idxs += plen+1; /* shift pointer to received data */ 5594 } 5595 } 5596 for (i=0;i<nis;i++) { 5597 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5598 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5599 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5600 } 5601 ierr = PetscFree(count_is);CHKERRQ(ierr); 5602 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5603 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5604 } 5605 /* free workspace */ 5606 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5607 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5608 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5609 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5610 if (isdense) { 5611 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5612 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5613 } else { 5614 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5615 } 5616 if (nis) { 5617 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5618 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5619 } 5620 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5621 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5622 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5623 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5624 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5625 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5626 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5627 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5628 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5629 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5630 ierr = PetscFree(onodes);CHKERRQ(ierr); 5631 if (nis) { 5632 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5633 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5634 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5635 } 5636 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5637 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5638 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5639 for (i=0;i<nis;i++) { 5640 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5641 } 5642 *mat_n = NULL; 5643 } 5644 PetscFunctionReturn(0); 5645 } 5646 5647 /* temporary hack into ksp private data structure */ 5648 #include <petsc/private/kspimpl.h> 5649 5650 #undef __FUNCT__ 5651 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5652 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5653 { 5654 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5655 PC_IS *pcis = (PC_IS*)pc->data; 5656 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5657 MatNullSpace CoarseNullSpace = NULL; 5658 ISLocalToGlobalMapping coarse_islg; 5659 IS coarse_is,*isarray; 5660 PetscInt i,im_active=-1,active_procs=-1; 5661 PetscInt nis,nisdofs,nisneu,nisvert; 5662 PC pc_temp; 5663 PCType coarse_pc_type; 5664 KSPType coarse_ksp_type; 5665 PetscBool multilevel_requested,multilevel_allowed; 5666 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5667 Mat t_coarse_mat_is; 5668 PetscInt ncoarse; 5669 PetscBool compute_vecs = PETSC_FALSE; 5670 PetscScalar *array; 5671 MatReuse coarse_mat_reuse; 5672 PetscBool restr, full_restr, have_void; 5673 PetscErrorCode ierr; 5674 5675 PetscFunctionBegin; 5676 /* Assign global numbering to coarse dofs */ 5677 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5678 PetscInt ocoarse_size; 5679 compute_vecs = PETSC_TRUE; 5680 ocoarse_size = pcbddc->coarse_size; 5681 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5682 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5683 /* see if we can avoid some work */ 5684 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5685 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5686 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5687 PC pc; 5688 PetscBool isbddc; 5689 5690 /* temporary workaround since PCBDDC does not have a reset method so far */ 5691 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5692 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5693 if (isbddc) { 5694 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5695 } else { 5696 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5697 } 5698 coarse_reuse = PETSC_FALSE; 5699 } else { /* we can safely reuse already computed coarse matrix */ 5700 coarse_reuse = PETSC_TRUE; 5701 } 5702 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5703 coarse_reuse = PETSC_FALSE; 5704 } 5705 /* reset any subassembling information */ 5706 if (!coarse_reuse || pcbddc->recompute_topography) { 5707 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5708 } 5709 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5710 coarse_reuse = PETSC_TRUE; 5711 } 5712 /* assemble coarse matrix */ 5713 if (coarse_reuse && pcbddc->coarse_ksp) { 5714 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5715 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5716 coarse_mat_reuse = MAT_REUSE_MATRIX; 5717 } else { 5718 coarse_mat = NULL; 5719 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5720 } 5721 5722 /* creates temporary l2gmap and IS for coarse indexes */ 5723 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5724 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5725 5726 /* creates temporary MATIS object for coarse matrix */ 5727 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5728 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5729 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5730 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5731 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5732 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5733 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5734 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5735 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5736 5737 /* count "active" (i.e. with positive local size) and "void" processes */ 5738 im_active = !!(pcis->n); 5739 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5740 5741 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5742 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5743 /* full_restr : just use the receivers from the subassembling pattern */ 5744 coarse_mat_is = NULL; 5745 multilevel_allowed = PETSC_FALSE; 5746 multilevel_requested = PETSC_FALSE; 5747 full_restr = PETSC_TRUE; 5748 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5749 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5750 if (multilevel_requested) { 5751 ncoarse = active_procs/pcbddc->coarsening_ratio; 5752 restr = PETSC_FALSE; 5753 full_restr = PETSC_FALSE; 5754 } else { 5755 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5756 restr = PETSC_TRUE; 5757 full_restr = PETSC_TRUE; 5758 } 5759 ncoarse = PetscMax(1,ncoarse); 5760 if (!pcbddc->coarse_subassembling) { 5761 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5762 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5763 PetscInt psum; 5764 PetscMPIInt size; 5765 if (pcbddc->coarse_ksp) psum = 1; 5766 else psum = 0; 5767 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5768 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5769 if (ncoarse < size) have_void = PETSC_TRUE; 5770 } 5771 /* determine if we can go multilevel */ 5772 if (multilevel_requested) { 5773 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5774 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5775 } 5776 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5777 5778 /* dump subassembling pattern */ 5779 if (pcbddc->dbg_flag && multilevel_allowed) { 5780 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5781 } 5782 5783 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5784 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5785 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5786 const PetscInt *idxs; 5787 ISLocalToGlobalMapping tmap; 5788 5789 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5790 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5791 /* allocate space for temporary storage */ 5792 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5793 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5794 /* allocate for IS array */ 5795 nisdofs = pcbddc->n_ISForDofsLocal; 5796 nisneu = !!pcbddc->NeumannBoundariesLocal; 5797 nisvert = 0; /* nisvert is not used */ 5798 nis = nisdofs + nisneu + nisvert; 5799 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5800 /* dofs splitting */ 5801 for (i=0;i<nisdofs;i++) { 5802 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5803 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5804 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5805 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5806 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5807 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5808 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5809 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5810 } 5811 /* neumann boundaries */ 5812 if (pcbddc->NeumannBoundariesLocal) { 5813 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5814 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5815 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5816 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5817 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5818 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5819 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5820 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5821 } 5822 /* free memory */ 5823 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5824 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5825 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5826 } else { 5827 nis = 0; 5828 nisdofs = 0; 5829 nisneu = 0; 5830 nisvert = 0; 5831 isarray = NULL; 5832 } 5833 /* destroy no longer needed map */ 5834 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5835 5836 /* subassemble */ 5837 if (multilevel_allowed) { 5838 PetscBool reuse,reuser; 5839 if (coarse_mat) reuse = PETSC_TRUE; 5840 else reuse = PETSC_FALSE; 5841 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5842 if (reuser) { 5843 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5844 } else { 5845 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5846 } 5847 /* TODO: if (pcbddc->benign_have_null) -> give a hint to the coarser levels if they have to locally apply the benign trick or not */ 5848 } else { 5849 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5850 } 5851 if (coarse_mat_is || coarse_mat) { 5852 PetscMPIInt size; 5853 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5854 if (!multilevel_allowed) { 5855 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5856 } else { 5857 Mat A; 5858 5859 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5860 if (coarse_mat_is) { 5861 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5862 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5863 coarse_mat = coarse_mat_is; 5864 } 5865 /* be sure we don't have MatSeqDENSE as local mat */ 5866 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5867 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5868 } 5869 } 5870 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5871 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5872 5873 /* create local to global scatters for coarse problem */ 5874 if (compute_vecs) { 5875 PetscInt lrows; 5876 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5877 if (coarse_mat) { 5878 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5879 } else { 5880 lrows = 0; 5881 } 5882 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5883 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5884 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5885 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5886 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5887 } 5888 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5889 5890 /* set defaults for coarse KSP and PC */ 5891 if (multilevel_allowed) { 5892 coarse_ksp_type = KSPRICHARDSON; 5893 coarse_pc_type = PCBDDC; 5894 } else { 5895 coarse_ksp_type = KSPPREONLY; 5896 coarse_pc_type = PCREDUNDANT; 5897 } 5898 5899 /* print some info if requested */ 5900 if (pcbddc->dbg_flag) { 5901 if (!multilevel_allowed) { 5902 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5903 if (multilevel_requested) { 5904 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5905 } else if (pcbddc->max_levels) { 5906 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5907 } 5908 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5909 } 5910 } 5911 5912 /* create the coarse KSP object only once with defaults */ 5913 if (coarse_mat) { 5914 PetscViewer dbg_viewer = NULL; 5915 if (pcbddc->dbg_flag) { 5916 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5917 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5918 } 5919 if (!pcbddc->coarse_ksp) { 5920 char prefix[256],str_level[16]; 5921 size_t len; 5922 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5923 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5924 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5925 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5926 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5927 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5928 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5929 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5930 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5931 /* prefix */ 5932 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5933 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5934 if (!pcbddc->current_level) { 5935 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5936 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5937 } else { 5938 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5939 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5940 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5941 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5942 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5943 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5944 } 5945 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5946 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5947 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5948 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5949 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5950 /* allow user customization */ 5951 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5952 } 5953 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5954 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5955 if (nisdofs) { 5956 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5957 for (i=0;i<nisdofs;i++) { 5958 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5959 } 5960 } 5961 if (nisneu) { 5962 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5963 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5964 } 5965 if (nisvert) { 5966 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5967 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5968 } 5969 5970 /* get some info after set from options */ 5971 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5972 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5973 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5974 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5975 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5976 isbddc = PETSC_FALSE; 5977 } 5978 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5979 if (isredundant) { 5980 KSP inner_ksp; 5981 PC inner_pc; 5982 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5983 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5984 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5985 } 5986 5987 /* parameters which miss an API */ 5988 if (isbddc) { 5989 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5990 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5991 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5992 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5993 if (pcbddc_coarse->benign_saddle_point) { 5994 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5995 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5996 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5997 } 5998 } 5999 6000 /* propagate symmetry info of coarse matrix */ 6001 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6002 if (pc->pmat->symmetric_set) { 6003 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6004 } 6005 if (pc->pmat->hermitian_set) { 6006 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6007 } 6008 if (pc->pmat->spd_set) { 6009 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6010 } 6011 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6012 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6013 } 6014 /* set operators */ 6015 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6016 if (pcbddc->dbg_flag) { 6017 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6018 } 6019 } 6020 ierr = PetscFree(isarray);CHKERRQ(ierr); 6021 #if 0 6022 { 6023 PetscViewer viewer; 6024 char filename[256]; 6025 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6026 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6027 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6028 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6029 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6030 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6031 } 6032 #endif 6033 6034 if (pcbddc->coarse_ksp) { 6035 Vec crhs,csol; 6036 6037 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6038 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6039 if (!csol) { 6040 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6041 } 6042 if (!crhs) { 6043 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6044 } 6045 } 6046 6047 /* compute null space for coarse solver if the benign trick has been requested */ 6048 if (pcbddc->benign_null) { 6049 6050 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6051 for (i=0;i<pcbddc->benign_n;i++) { 6052 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6053 } 6054 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6055 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6056 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6057 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6058 if (coarse_mat) { 6059 Vec nullv; 6060 PetscScalar *array,*array2; 6061 PetscInt nl; 6062 6063 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6064 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6065 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6066 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6067 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6068 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6069 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6070 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6071 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6072 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6073 } 6074 } 6075 6076 if (pcbddc->coarse_ksp) { 6077 PetscBool ispreonly; 6078 6079 if (CoarseNullSpace) { 6080 PetscBool isnull; 6081 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6082 if (isnull) { 6083 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6084 } 6085 /* TODO: add local nullspaces (if any) */ 6086 } 6087 /* setup coarse ksp */ 6088 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6089 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6090 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6091 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6092 KSP check_ksp; 6093 KSPType check_ksp_type; 6094 PC check_pc; 6095 Vec check_vec,coarse_vec; 6096 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6097 PetscInt its; 6098 PetscBool compute_eigs; 6099 PetscReal *eigs_r,*eigs_c; 6100 PetscInt neigs; 6101 const char *prefix; 6102 6103 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6104 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6105 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6106 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6107 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6108 /* prevent from setup unneeded object */ 6109 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6110 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6111 if (ispreonly) { 6112 check_ksp_type = KSPPREONLY; 6113 compute_eigs = PETSC_FALSE; 6114 } else { 6115 check_ksp_type = KSPGMRES; 6116 compute_eigs = PETSC_TRUE; 6117 } 6118 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6119 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6120 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6121 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6122 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6123 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6124 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6125 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6126 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6127 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6128 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6129 /* create random vec */ 6130 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6131 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6132 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6133 /* solve coarse problem */ 6134 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6135 /* set eigenvalue estimation if preonly has not been requested */ 6136 if (compute_eigs) { 6137 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6138 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6139 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6140 lambda_max = eigs_r[neigs-1]; 6141 lambda_min = eigs_r[0]; 6142 if (pcbddc->use_coarse_estimates) { 6143 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6144 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6145 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6146 } 6147 } 6148 } 6149 6150 /* check coarse problem residual error */ 6151 if (pcbddc->dbg_flag) { 6152 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6153 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6154 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6155 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6156 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6157 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6158 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6159 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6160 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6161 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6162 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6163 if (CoarseNullSpace) { 6164 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6165 } 6166 if (compute_eigs) { 6167 PetscReal lambda_max_s,lambda_min_s; 6168 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6169 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6170 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6171 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6172 for (i=0;i<neigs;i++) { 6173 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6174 } 6175 } 6176 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6177 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6178 } 6179 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6180 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6181 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6182 if (compute_eigs) { 6183 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6184 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6185 } 6186 } 6187 } 6188 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6189 /* print additional info */ 6190 if (pcbddc->dbg_flag) { 6191 /* waits until all processes reaches this point */ 6192 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6193 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6194 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6195 } 6196 6197 /* free memory */ 6198 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6199 PetscFunctionReturn(0); 6200 } 6201 6202 #undef __FUNCT__ 6203 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6204 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6205 { 6206 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6207 PC_IS* pcis = (PC_IS*)pc->data; 6208 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6209 IS subset,subset_mult,subset_n; 6210 PetscInt local_size,coarse_size=0; 6211 PetscInt *local_primal_indices=NULL; 6212 const PetscInt *t_local_primal_indices; 6213 PetscErrorCode ierr; 6214 6215 PetscFunctionBegin; 6216 /* Compute global number of coarse dofs */ 6217 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6218 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6219 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6220 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6221 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6222 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6223 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6224 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6225 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6226 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6227 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6228 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6229 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6230 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6231 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6232 6233 /* check numbering */ 6234 if (pcbddc->dbg_flag) { 6235 PetscScalar coarsesum,*array,*array2; 6236 PetscInt i; 6237 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6238 6239 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6240 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6241 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6242 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6243 /* counter */ 6244 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6245 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6246 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6247 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6248 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6249 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6250 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6251 for (i=0;i<pcbddc->local_primal_size;i++) { 6252 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6253 } 6254 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6255 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6256 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6257 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6258 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6259 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6260 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6261 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6262 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6263 for (i=0;i<pcis->n;i++) { 6264 if (array[i] != 0.0 && array[i] != array2[i]) { 6265 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6266 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6267 set_error = PETSC_TRUE; 6268 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6270 } 6271 } 6272 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6273 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6275 for (i=0;i<pcis->n;i++) { 6276 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6277 } 6278 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6279 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6280 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6281 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6282 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6283 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6284 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6285 PetscInt *gidxs; 6286 6287 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6288 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6289 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6292 for (i=0;i<pcbddc->local_primal_size;i++) { 6293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6294 } 6295 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6296 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6297 } 6298 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6299 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6300 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6301 } 6302 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6303 /* get back data */ 6304 *coarse_size_n = coarse_size; 6305 *local_primal_indices_n = local_primal_indices; 6306 PetscFunctionReturn(0); 6307 } 6308 6309 #undef __FUNCT__ 6310 #define __FUNCT__ "PCBDDCGlobalToLocal" 6311 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6312 { 6313 IS localis_t; 6314 PetscInt i,lsize,*idxs,n; 6315 PetscScalar *vals; 6316 PetscErrorCode ierr; 6317 6318 PetscFunctionBegin; 6319 /* get indices in local ordering exploiting local to global map */ 6320 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6321 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6322 for (i=0;i<lsize;i++) vals[i] = 1.0; 6323 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6324 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6325 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6326 if (idxs) { /* multilevel guard */ 6327 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6328 } 6329 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6330 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6331 ierr = PetscFree(vals);CHKERRQ(ierr); 6332 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6333 /* now compute set in local ordering */ 6334 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6335 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6336 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6337 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6338 for (i=0,lsize=0;i<n;i++) { 6339 if (PetscRealPart(vals[i]) > 0.5) { 6340 lsize++; 6341 } 6342 } 6343 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6344 for (i=0,lsize=0;i<n;i++) { 6345 if (PetscRealPart(vals[i]) > 0.5) { 6346 idxs[lsize++] = i; 6347 } 6348 } 6349 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6350 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6351 *localis = localis_t; 6352 PetscFunctionReturn(0); 6353 } 6354 6355 #undef __FUNCT__ 6356 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6357 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6358 { 6359 PC_IS *pcis=(PC_IS*)pc->data; 6360 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6361 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6362 Mat S_j; 6363 PetscInt *used_xadj,*used_adjncy; 6364 PetscBool free_used_adj; 6365 PetscErrorCode ierr; 6366 6367 PetscFunctionBegin; 6368 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6369 free_used_adj = PETSC_FALSE; 6370 if (pcbddc->sub_schurs_layers == -1) { 6371 used_xadj = NULL; 6372 used_adjncy = NULL; 6373 } else { 6374 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6375 used_xadj = pcbddc->mat_graph->xadj; 6376 used_adjncy = pcbddc->mat_graph->adjncy; 6377 } else if (pcbddc->computed_rowadj) { 6378 used_xadj = pcbddc->mat_graph->xadj; 6379 used_adjncy = pcbddc->mat_graph->adjncy; 6380 } else { 6381 PetscBool flg_row=PETSC_FALSE; 6382 const PetscInt *xadj,*adjncy; 6383 PetscInt nvtxs; 6384 6385 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6386 if (flg_row) { 6387 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6388 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6389 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6390 free_used_adj = PETSC_TRUE; 6391 } else { 6392 pcbddc->sub_schurs_layers = -1; 6393 used_xadj = NULL; 6394 used_adjncy = NULL; 6395 } 6396 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6397 } 6398 } 6399 6400 /* setup sub_schurs data */ 6401 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6402 if (!sub_schurs->schur_explicit) { 6403 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6404 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6405 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6406 } else { 6407 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6408 PetscBool isseqaij,need_change = PETSC_FALSE;; 6409 PetscInt benign_n; 6410 Mat change = NULL; 6411 Vec scaling = NULL; 6412 IS change_primal = NULL; 6413 6414 if (!pcbddc->use_vertices && reuse_solvers) { 6415 PetscInt n_vertices; 6416 6417 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6418 reuse_solvers = (PetscBool)!n_vertices; 6419 } 6420 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6421 if (!isseqaij) { 6422 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6423 if (matis->A == pcbddc->local_mat) { 6424 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6425 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6426 } else { 6427 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6428 } 6429 } 6430 if (!pcbddc->benign_change_explicit) { 6431 benign_n = pcbddc->benign_n; 6432 } else { 6433 benign_n = 0; 6434 } 6435 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6436 We need a global reduction to avoid possible deadlocks. 6437 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6438 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6439 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6440 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6441 need_change = (PetscBool)(!need_change); 6442 } 6443 /* If the user defines additional constraints, we import them here. 6444 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6445 if (need_change) { 6446 PC_IS *pcisf; 6447 PC_BDDC *pcbddcf; 6448 PC pcf; 6449 6450 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6451 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6452 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6453 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6454 /* hacks */ 6455 pcisf = (PC_IS*)pcf->data; 6456 pcisf->is_B_local = pcis->is_B_local; 6457 pcisf->vec1_N = pcis->vec1_N; 6458 pcisf->BtoNmap = pcis->BtoNmap; 6459 pcisf->n = pcis->n; 6460 pcisf->n_B = pcis->n_B; 6461 pcbddcf = (PC_BDDC*)pcf->data; 6462 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6463 pcbddcf->mat_graph = pcbddc->mat_graph; 6464 pcbddcf->use_faces = PETSC_TRUE; 6465 pcbddcf->use_change_of_basis = PETSC_TRUE; 6466 pcbddcf->use_change_on_faces = PETSC_TRUE; 6467 pcbddcf->use_qr_single = PETSC_TRUE; 6468 pcbddcf->fake_change = PETSC_TRUE; 6469 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6470 /* store information on primal vertices and change of basis (in local numbering) */ 6471 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6472 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6473 change = pcbddcf->ConstraintMatrix; 6474 pcbddcf->ConstraintMatrix = NULL; 6475 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6476 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6477 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6478 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6479 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6480 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6481 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6482 pcf->ops->destroy = NULL; 6483 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6484 } 6485 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6486 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6487 ierr = MatDestroy(&change);CHKERRQ(ierr); 6488 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6489 } 6490 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6491 6492 /* free adjacency */ 6493 if (free_used_adj) { 6494 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6495 } 6496 PetscFunctionReturn(0); 6497 } 6498 6499 #undef __FUNCT__ 6500 #define __FUNCT__ "PCBDDCInitSubSchurs" 6501 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6502 { 6503 PC_IS *pcis=(PC_IS*)pc->data; 6504 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6505 PCBDDCGraph graph; 6506 PetscErrorCode ierr; 6507 6508 PetscFunctionBegin; 6509 /* attach interface graph for determining subsets */ 6510 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6511 IS verticesIS,verticescomm; 6512 PetscInt vsize,*idxs; 6513 6514 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6515 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6516 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6517 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6518 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6519 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6520 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6521 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6522 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6523 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6524 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6525 } else { 6526 graph = pcbddc->mat_graph; 6527 } 6528 /* print some info */ 6529 if (pcbddc->dbg_flag) { 6530 IS vertices; 6531 PetscInt nv,nedges,nfaces; 6532 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6533 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6534 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6535 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6536 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6537 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6538 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6539 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6542 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6543 } 6544 6545 /* sub_schurs init */ 6546 if (!pcbddc->sub_schurs) { 6547 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6548 } 6549 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6550 6551 /* free graph struct */ 6552 if (pcbddc->sub_schurs_rebuild) { 6553 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6554 } 6555 PetscFunctionReturn(0); 6556 } 6557 6558 #undef __FUNCT__ 6559 #define __FUNCT__ "PCBDDCCheckOperator" 6560 PetscErrorCode PCBDDCCheckOperator(PC pc) 6561 { 6562 PC_IS *pcis=(PC_IS*)pc->data; 6563 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6564 PetscErrorCode ierr; 6565 6566 PetscFunctionBegin; 6567 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6568 IS zerodiag = NULL; 6569 Mat S_j,B0_B=NULL; 6570 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6571 PetscScalar *p0_check,*array,*array2; 6572 PetscReal norm; 6573 PetscInt i; 6574 6575 /* B0 and B0_B */ 6576 if (zerodiag) { 6577 IS dummy; 6578 6579 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6580 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6581 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6582 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6583 } 6584 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6585 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6586 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6587 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6588 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6589 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6590 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6591 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6592 /* S_j */ 6593 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6594 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6595 6596 /* mimic vector in \widetilde{W}_\Gamma */ 6597 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6598 /* continuous in primal space */ 6599 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6600 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6601 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6602 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6603 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6604 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6605 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6606 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6607 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6608 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6609 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6610 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6611 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6612 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6613 6614 /* assemble rhs for coarse problem */ 6615 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6616 /* local with Schur */ 6617 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6618 if (zerodiag) { 6619 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6620 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6621 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6622 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6623 } 6624 /* sum on primal nodes the local contributions */ 6625 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6626 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6627 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6628 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6629 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6630 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6631 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6632 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6633 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6634 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6635 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6636 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6637 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6638 /* scale primal nodes (BDDC sums contibutions) */ 6639 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6640 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6641 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6642 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6643 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6644 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6645 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6646 /* global: \widetilde{B0}_B w_\Gamma */ 6647 if (zerodiag) { 6648 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6649 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6650 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6651 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6652 } 6653 /* BDDC */ 6654 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6655 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6656 6657 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6658 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6659 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6660 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6661 for (i=0;i<pcbddc->benign_n;i++) { 6662 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6663 } 6664 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6665 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6666 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6667 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6668 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6669 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6670 } 6671 PetscFunctionReturn(0); 6672 } 6673