1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 799 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 800 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 801 if (!pcbddc->current_level) { 802 /* this matrix is actually sparse: the nnz pattern is not known unless we do 2 sweeps of the next loop. 803 Setting nnz=100 should be more than enough for the finest level */ 804 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,100,NULL);CHKERRQ(ierr); 805 } else { 806 PetscInt *nnz; 807 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 808 for (i=0;i<pcbddc->benign_n;i++) { 809 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 810 nnz[i] = n - nnz[i]; 811 } 812 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 813 ierr = PetscFree(nnz);CHKERRQ(ierr); 814 } 815 } 816 817 for (i=0;i<pcbddc->benign_n;i++) { 818 PetscScalar *array; 819 PetscInt *idxs,j,nz,cum; 820 821 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 822 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 823 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 824 for (j=0;j<nz;j++) vals[j] = 1.; 825 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 826 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 827 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 828 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 829 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 830 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 831 cum = 0; 832 for (j=0;j<n;j++) { 833 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 834 vals[cum] = array[j]; 835 idxs_ins[cum] = j; 836 cum++; 837 } 838 } 839 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 840 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 841 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 842 } 843 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 844 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 845 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 846 } 847 } else { /* push */ 848 if (pcbddc->benign_change_explicit) { 849 PetscInt i; 850 851 for (i=0;i<pcbddc->benign_n;i++) { 852 PetscScalar *B0_vals; 853 PetscInt *B0_cols,B0_ncol; 854 855 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 856 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 857 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 858 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 859 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 860 } 861 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 863 } else { 864 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 865 } 866 } 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "PCBDDCAdaptiveSelection" 872 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 873 { 874 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 875 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 876 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 877 PetscBLASInt *B_iwork,*B_ifail; 878 PetscScalar *work,lwork; 879 PetscScalar *St,*S,*eigv; 880 PetscScalar *Sarray,*Starray; 881 PetscReal *eigs,thresh; 882 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 883 PetscBool allocated_S_St; 884 #if defined(PETSC_USE_COMPLEX) 885 PetscReal *rwork; 886 #endif 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 if (!sub_schurs->schur_explicit) { 891 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 892 } 893 894 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 895 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 896 } 897 898 if (pcbddc->dbg_flag) { 899 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 900 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 901 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 903 } 904 905 if (pcbddc->dbg_flag) { 906 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 907 } 908 909 /* max size of subsets */ 910 mss = 0; 911 for (i=0;i<sub_schurs->n_subs;i++) { 912 PetscInt subset_size; 913 914 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 915 mss = PetscMax(mss,subset_size); 916 } 917 918 /* min/max and threshold */ 919 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 920 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 921 nmax = PetscMax(nmin,nmax); 922 allocated_S_St = PETSC_FALSE; 923 if (nmin) { 924 allocated_S_St = PETSC_TRUE; 925 } 926 927 /* allocate lapack workspace */ 928 cum = cum2 = 0; 929 maxneigs = 0; 930 for (i=0;i<sub_schurs->n_subs;i++) { 931 PetscInt n,subset_size; 932 933 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 934 n = PetscMin(subset_size,nmax); 935 cum += subset_size; 936 cum2 += subset_size*n; 937 maxneigs = PetscMax(maxneigs,n); 938 } 939 if (mss) { 940 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 941 PetscBLASInt B_itype = 1; 942 PetscBLASInt B_N = mss; 943 PetscReal zero = 0.0; 944 PetscReal eps = 0.0; /* dlamch? */ 945 946 B_lwork = -1; 947 S = NULL; 948 St = NULL; 949 eigs = NULL; 950 eigv = NULL; 951 B_iwork = NULL; 952 B_ifail = NULL; 953 #if defined(PETSC_USE_COMPLEX) 954 rwork = NULL; 955 #endif 956 thresh = 1.0; 957 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 958 #if defined(PETSC_USE_COMPLEX) 959 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 960 #else 961 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 962 #endif 963 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 964 ierr = PetscFPTrapPop();CHKERRQ(ierr); 965 } else { 966 /* TODO */ 967 } 968 } else { 969 lwork = 0; 970 } 971 972 nv = 0; 973 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 974 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 975 } 976 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 977 if (allocated_S_St) { 978 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 979 } 980 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 981 #if defined(PETSC_USE_COMPLEX) 982 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 983 #endif 984 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 985 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 986 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 987 nv+cum,&pcbddc->adaptive_constraints_idxs, 988 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 989 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 990 991 maxneigs = 0; 992 cum = cum2 = cumarray = 0; 993 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 994 pcbddc->adaptive_constraints_data_ptr[0] = 0; 995 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 996 const PetscInt *idxs; 997 998 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 999 for (cum=0;cum<nv;cum++) { 1000 pcbddc->adaptive_constraints_n[cum] = 1; 1001 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1002 pcbddc->adaptive_constraints_data[cum] = 1.0; 1003 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1004 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1005 } 1006 cum2 = cum; 1007 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1008 } 1009 1010 if (mss) { /* multilevel */ 1011 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1012 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1013 } 1014 1015 thresh = pcbddc->adaptive_threshold; 1016 for (i=0;i<sub_schurs->n_subs;i++) { 1017 const PetscInt *idxs; 1018 PetscReal infty = PETSC_MAX_REAL; 1019 PetscInt j,subset_size,eigs_start = 0; 1020 PetscBLASInt B_N; 1021 PetscBool same_data = PETSC_FALSE; 1022 1023 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1024 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1025 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1026 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1027 if (sub_schurs->is_hermitian) { 1028 PetscInt j,k; 1029 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1030 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1031 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1032 } 1033 for (j=0;j<subset_size;j++) { 1034 for (k=j;k<subset_size;k++) { 1035 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1036 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1037 } 1038 } 1039 } else { 1040 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1041 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 } else { 1044 S = Sarray + cumarray; 1045 St = Starray + cumarray; 1046 } 1047 1048 /* see if we can save some work */ 1049 if (sub_schurs->n_subs == 1) { 1050 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1051 } 1052 1053 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1054 B_neigs = 0; 1055 } else { 1056 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1057 PetscBLASInt B_itype = 1; 1058 PetscBLASInt B_IL, B_IU; 1059 PetscReal eps = -1.0; /* dlamch? */ 1060 PetscInt nmin_s; 1061 1062 if (pcbddc->dbg_flag) { 1063 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1064 } 1065 1066 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1067 if (thresh > 1.+PETSC_SMALL) { 1068 1069 /* ask for eigenvalues larger than thresh */ 1070 #if defined(PETSC_USE_COMPLEX) 1071 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1072 #else 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1074 #endif 1075 } else { 1076 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1077 B_IL = 1; 1078 #if defined(PETSC_USE_COMPLEX) 1079 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1080 #else 1081 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1082 #endif 1083 } 1084 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1085 if (B_ierr) { 1086 if (B_ierr < 0 ) { 1087 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1088 } else if (B_ierr <= B_N) { 1089 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1090 } else { 1091 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1092 } 1093 } 1094 1095 if (B_neigs > nmax) { 1096 if (pcbddc->dbg_flag) { 1097 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1098 } 1099 eigs_start = B_neigs -nmax; 1100 B_neigs = nmax; 1101 } 1102 1103 nmin_s = PetscMin(nmin,B_N); 1104 if (B_neigs < nmin_s) { 1105 PetscBLASInt B_neigs2; 1106 1107 B_IU = B_N - B_neigs; 1108 B_IL = B_N - nmin_s + 1; 1109 if (pcbddc->dbg_flag) { 1110 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1111 } 1112 if (sub_schurs->is_hermitian) { 1113 PetscInt j; 1114 for (j=0;j<subset_size;j++) { 1115 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1116 } 1117 for (j=0;j<subset_size;j++) { 1118 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1119 } 1120 } else { 1121 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1122 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1123 } 1124 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1125 #if defined(PETSC_USE_COMPLEX) 1126 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1127 #else 1128 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1129 #endif 1130 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1131 B_neigs += B_neigs2; 1132 } 1133 if (B_ierr) { 1134 if (B_ierr < 0 ) { 1135 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1136 } else if (B_ierr <= B_N) { 1137 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1138 } else { 1139 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1140 } 1141 } 1142 if (pcbddc->dbg_flag) { 1143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1144 for (j=0;j<B_neigs;j++) { 1145 if (eigs[j] == 0.0) { 1146 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1147 } else { 1148 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1149 } 1150 } 1151 } 1152 } else { 1153 /* TODO */ 1154 } 1155 } 1156 maxneigs = PetscMax(B_neigs,maxneigs); 1157 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1158 if (B_neigs) { 1159 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1160 1161 if (pcbddc->dbg_flag > 1) { 1162 PetscInt ii; 1163 for (ii=0;ii<B_neigs;ii++) { 1164 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1165 for (j=0;j<B_N;j++) { 1166 #if defined(PETSC_USE_COMPLEX) 1167 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1168 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1170 #else 1171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1172 #endif 1173 } 1174 } 1175 } 1176 #if 0 1177 for (j=0;j<B_neigs;j++) { 1178 PetscBLASInt Blas_N,Blas_one = 1.0; 1179 PetscScalar norm; 1180 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1181 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1182 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1183 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1184 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1185 } else { 1186 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1187 } 1188 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1189 } 1190 #endif 1191 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1192 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1193 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1194 cum++; 1195 } 1196 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1197 /* shift for next computation */ 1198 cumarray += subset_size*subset_size; 1199 } 1200 if (pcbddc->dbg_flag) { 1201 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1202 } 1203 1204 if (mss) { 1205 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1206 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1207 /* destroy matrices (junk) */ 1208 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1209 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1210 } 1211 if (allocated_S_St) { 1212 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1213 } 1214 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1215 #if defined(PETSC_USE_COMPLEX) 1216 ierr = PetscFree(rwork);CHKERRQ(ierr); 1217 #endif 1218 if (pcbddc->dbg_flag) { 1219 PetscInt maxneigs_r; 1220 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1221 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1222 } 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "PCBDDCSetUpSolvers" 1228 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1229 { 1230 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1231 PetscScalar *coarse_submat_vals; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1236 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1237 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1238 1239 /* Setup local neumann solver ksp_R */ 1240 /* PCBDDCSetUpLocalScatters should be called first! */ 1241 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1242 1243 /* Change global null space passed in by the user if change of basis has been requested */ 1244 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1245 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1246 } 1247 1248 /* 1249 Setup local correction and local part of coarse basis. 1250 Gives back the dense local part of the coarse matrix in column major ordering 1251 */ 1252 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1253 1254 /* Compute total number of coarse nodes and setup coarse solver */ 1255 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1256 1257 /* free */ 1258 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "PCBDDCResetCustomization" 1264 PetscErrorCode PCBDDCResetCustomization(PC pc) 1265 { 1266 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1271 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1272 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1273 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1274 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1275 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1276 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1277 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1278 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1279 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1280 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1281 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "PCBDDCResetTopography" 1287 PetscErrorCode PCBDDCResetTopography(PC pc) 1288 { 1289 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1290 PetscInt i; 1291 PetscErrorCode ierr; 1292 1293 PetscFunctionBegin; 1294 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1295 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1296 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1297 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1298 for (i=0;i<pcbddc->n_local_subs;i++) { 1299 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1300 } 1301 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1302 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "PCBDDCResetSolvers" 1308 PetscErrorCode PCBDDCResetSolvers(PC pc) 1309 { 1310 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1311 PetscErrorCode ierr; 1312 1313 PetscFunctionBegin; 1314 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1315 if (pcbddc->coarse_phi_B) { 1316 PetscScalar *array; 1317 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1318 ierr = PetscFree(array);CHKERRQ(ierr); 1319 } 1320 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1321 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1322 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1323 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1324 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1325 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1326 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1327 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1328 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1329 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1330 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1331 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1332 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1333 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1334 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1335 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1336 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1338 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1339 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1340 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1341 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1342 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1343 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1344 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1345 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1346 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1347 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1348 if (pcbddc->benign_zerodiag_subs) { 1349 PetscInt i; 1350 for (i=0;i<pcbddc->benign_n;i++) { 1351 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1352 } 1353 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1354 } 1355 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1361 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1362 { 1363 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1364 PC_IS *pcis = (PC_IS*)pc->data; 1365 VecType impVecType; 1366 PetscInt n_constraints,n_R,old_size; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 if (!pcbddc->ConstraintMatrix) { 1371 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1372 } 1373 /* get sizes */ 1374 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1375 n_R = pcis->n - pcbddc->n_vertices; 1376 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1377 /* local work vectors (try to avoid unneeded work)*/ 1378 /* R nodes */ 1379 old_size = -1; 1380 if (pcbddc->vec1_R) { 1381 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1382 } 1383 if (n_R != old_size) { 1384 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1385 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1386 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1387 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1388 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1389 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1390 } 1391 /* local primal dofs */ 1392 old_size = -1; 1393 if (pcbddc->vec1_P) { 1394 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1395 } 1396 if (pcbddc->local_primal_size != old_size) { 1397 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1398 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1399 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1400 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1401 } 1402 /* local explicit constraints */ 1403 old_size = -1; 1404 if (pcbddc->vec1_C) { 1405 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1406 } 1407 if (n_constraints && n_constraints != old_size) { 1408 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1409 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1410 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1411 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "PCBDDCSetUpCorrection" 1418 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1419 { 1420 PetscErrorCode ierr; 1421 /* pointers to pcis and pcbddc */ 1422 PC_IS* pcis = (PC_IS*)pc->data; 1423 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1424 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1425 /* submatrices of local problem */ 1426 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1427 /* submatrices of benign trick */ 1428 Mat B0_V = NULL; 1429 /* submatrices of local coarse problem */ 1430 Mat S_VV,S_CV,S_VC,S_CC; 1431 /* working matrices */ 1432 Mat C_CR; 1433 /* additional working stuff */ 1434 PC pc_R; 1435 Mat F; 1436 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1437 1438 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1439 PetscScalar *work; 1440 PetscInt *idx_V_B; 1441 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1442 PetscInt i,n_R,n_D,n_B; 1443 1444 /* some shortcuts to scalars */ 1445 PetscScalar one=1.0,m_one=-1.0; 1446 1447 PetscFunctionBegin; 1448 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1449 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1450 } 1451 1452 /* Set Non-overlapping dimensions */ 1453 n_vertices = pcbddc->n_vertices; 1454 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1455 n_B = pcis->n_B; 1456 n_D = pcis->n - n_B; 1457 n_R = pcis->n - n_vertices; 1458 1459 /* vertices in boundary numbering */ 1460 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1461 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1462 if (i != n_vertices) { 1463 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1464 } 1465 1466 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1467 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1468 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1469 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1470 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1471 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1472 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1473 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1474 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1475 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1476 1477 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1478 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1479 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1480 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1481 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1482 lda_rhs = n_R; 1483 need_benign_correction = PETSC_FALSE; 1484 if (isLU || isILU || isCHOL) { 1485 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1486 } else if (sub_schurs->reuse_solver) { 1487 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1488 MatFactorType type; 1489 1490 F = reuse_solver->F; 1491 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1492 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1493 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1494 need_benign_correction = !!reuse_solver->benign_n; 1495 } else { 1496 F = NULL; 1497 } 1498 1499 /* allocate workspace */ 1500 n = 0; 1501 if (n_constraints) { 1502 n += lda_rhs*n_constraints; 1503 } 1504 if (n_vertices) { 1505 n = PetscMax(2*lda_rhs*n_vertices,n); 1506 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1507 } 1508 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1509 1510 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1511 if (n_constraints) { 1512 Mat M1,M2,M3; 1513 Mat auxmat; 1514 IS is_aux; 1515 PetscScalar *array,*array2; 1516 1517 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1518 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1519 1520 /* Extract constraints on R nodes: C_{CR} */ 1521 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1522 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1523 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1524 1525 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1526 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1527 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1528 for (i=0;i<n_constraints;i++) { 1529 const PetscScalar *row_cmat_values; 1530 const PetscInt *row_cmat_indices; 1531 PetscInt size_of_constraint,j; 1532 1533 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1534 for (j=0;j<size_of_constraint;j++) { 1535 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1536 } 1537 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1538 } 1539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1540 if (F) { 1541 Mat B; 1542 1543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1544 if (need_benign_correction) { 1545 PetscScalar *marr; 1546 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1547 1548 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1549 for (i=0;i<n_constraints;i++) { 1550 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1551 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1552 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1553 } 1554 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1555 } 1556 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1557 if (need_benign_correction) { 1558 PetscScalar *marr; 1559 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1560 1561 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1562 for (i=0;i<n_constraints;i++) { 1563 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1564 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1565 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1566 } 1567 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1568 } 1569 ierr = MatDestroy(&B);CHKERRQ(ierr); 1570 } else { 1571 PetscScalar *marr; 1572 1573 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1574 for (i=0;i<n_constraints;i++) { 1575 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1576 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1577 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1578 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1579 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1580 } 1581 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1582 } 1583 if (!pcbddc->switch_static) { 1584 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1585 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1586 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1587 for (i=0;i<n_constraints;i++) { 1588 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1589 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1590 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1593 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1594 } 1595 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1596 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1597 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1598 } else { 1599 if (lda_rhs != n_R) { 1600 IS dummy; 1601 1602 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1603 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1604 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1605 } else { 1606 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1607 pcbddc->local_auxmat2 = local_auxmat2_R; 1608 } 1609 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1610 } 1611 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1612 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1613 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1614 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1615 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1616 if (isCHOL) { 1617 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1618 } else { 1619 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1620 } 1621 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1622 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1623 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1624 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1625 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1626 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1627 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1628 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1629 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1630 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1631 } 1632 /* Get submatrices from subdomain matrix */ 1633 if (pcbddc->benign_n) { 1634 IS dummy; 1635 Mat B0_R; 1636 PetscReal norm; 1637 1638 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1639 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1640 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1641 if (norm > PETSC_SMALL) { 1642 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1643 } 1644 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1645 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1646 } 1647 1648 if (n_vertices) { 1649 IS is_aux; 1650 1651 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1652 IS tis; 1653 1654 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1655 ierr = ISSort(tis);CHKERRQ(ierr); 1656 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1657 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1658 } else { 1659 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1660 } 1661 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1662 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1663 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1664 if (pcbddc->benign_n) { 1665 IS dummy; 1666 1667 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1668 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1669 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1670 } 1671 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1672 } 1673 1674 /* Matrix of coarse basis functions (local) */ 1675 if (pcbddc->coarse_phi_B) { 1676 PetscInt on_B,on_primal,on_D=n_D; 1677 if (pcbddc->coarse_phi_D) { 1678 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1679 } 1680 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1681 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1682 PetscScalar *marray; 1683 1684 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1685 ierr = PetscFree(marray);CHKERRQ(ierr); 1686 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1687 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1688 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1689 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1690 } 1691 } 1692 1693 if (!pcbddc->coarse_phi_B) { 1694 PetscScalar *marray; 1695 1696 n = n_B*pcbddc->local_primal_size; 1697 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1698 n += n_D*pcbddc->local_primal_size; 1699 } 1700 if (!pcbddc->symmetric_primal) { 1701 n *= 2; 1702 } 1703 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1704 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1705 n = n_B*pcbddc->local_primal_size; 1706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1707 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1708 n += n_D*pcbddc->local_primal_size; 1709 } 1710 if (!pcbddc->symmetric_primal) { 1711 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1712 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1713 n = n_B*pcbddc->local_primal_size; 1714 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1715 } 1716 } else { 1717 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1718 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1719 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1720 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1721 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1722 } 1723 } 1724 } 1725 1726 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1727 p0_lidx_I = NULL; 1728 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1729 const PetscInt *idxs; 1730 1731 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1732 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1733 for (i=0;i<pcbddc->benign_n;i++) { 1734 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1735 } 1736 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1737 } 1738 1739 /* vertices */ 1740 if (n_vertices) { 1741 1742 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1743 1744 if (n_R) { 1745 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1746 PetscBLASInt B_N,B_one = 1; 1747 PetscScalar *x,*y; 1748 PetscBool isseqaij; 1749 1750 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1751 if (need_benign_correction) { 1752 ISLocalToGlobalMapping RtoN; 1753 IS is_p0; 1754 PetscInt *idxs_p0,n; 1755 1756 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1757 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1758 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1759 if (n != pcbddc->benign_n) { 1760 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1761 } 1762 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1763 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1764 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1765 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1766 } 1767 1768 if (lda_rhs == n_R) { 1769 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1770 } else { 1771 PetscScalar *av,*array; 1772 const PetscInt *xadj,*adjncy; 1773 PetscInt n; 1774 PetscBool flg_row; 1775 1776 array = work+lda_rhs*n_vertices; 1777 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1778 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1779 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1780 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1781 for (i=0;i<n;i++) { 1782 PetscInt j; 1783 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1784 } 1785 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1786 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1787 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1788 } 1789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1790 if (F) { 1791 if (need_benign_correction) { 1792 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1793 PetscScalar *marr; 1794 1795 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1796 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1797 1798 | 0 0 0 | (V) 1799 L = | 0 0 -1 | (P-p0) 1800 | 0 0 -1 | (p0) 1801 1802 */ 1803 for (i=0;i<reuse_solver->benign_n;i++) { 1804 const PetscScalar *vals; 1805 const PetscInt *idxs,*idxs_zero; 1806 PetscInt n,j,nz; 1807 1808 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1809 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1810 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1811 for (j=0;j<n;j++) { 1812 PetscScalar val = vals[j]; 1813 PetscInt k,col = idxs[j]; 1814 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1815 } 1816 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1817 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1818 } 1819 /* need to correct the rhs */ 1820 for (i=0;i<n_vertices;i++) { 1821 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1822 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1823 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1824 } 1825 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1826 } 1827 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1828 /* need to correct the solution */ 1829 if (need_benign_correction) { 1830 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1831 PetscScalar *marr; 1832 1833 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1834 for (i=0;i<n_vertices;i++) { 1835 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1836 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1837 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1838 } 1839 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1840 } 1841 } else { 1842 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1843 for (i=0;i<n_vertices;i++) { 1844 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1845 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1846 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1847 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1848 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1849 } 1850 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1851 } 1852 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1853 /* S_VV and S_CV */ 1854 if (n_constraints) { 1855 Mat B; 1856 1857 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1858 for (i=0;i<n_vertices;i++) { 1859 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1860 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1861 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1862 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1863 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1864 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1865 } 1866 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1867 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1868 ierr = MatDestroy(&B);CHKERRQ(ierr); 1869 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1870 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1871 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1872 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1873 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1874 ierr = MatDestroy(&B);CHKERRQ(ierr); 1875 } 1876 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1877 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1878 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1879 } 1880 if (lda_rhs != n_R) { 1881 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1883 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1884 } 1885 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1886 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1887 if (need_benign_correction) { 1888 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1889 PetscScalar *marr,*sums; 1890 1891 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1892 ierr = MatDenseGetArray(S_VVt,&marr); 1893 for (i=0;i<reuse_solver->benign_n;i++) { 1894 const PetscScalar *vals; 1895 const PetscInt *idxs,*idxs_zero; 1896 PetscInt n,j,nz; 1897 1898 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1899 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1900 for (j=0;j<n_vertices;j++) { 1901 PetscInt k; 1902 sums[j] = 0.; 1903 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1904 } 1905 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1906 for (j=0;j<n;j++) { 1907 PetscScalar val = vals[j]; 1908 PetscInt k; 1909 for (k=0;k<n_vertices;k++) { 1910 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1911 } 1912 } 1913 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1914 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1915 } 1916 ierr = PetscFree(sums);CHKERRQ(ierr); 1917 ierr = MatDenseRestoreArray(S_VVt,&marr); 1918 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1919 } 1920 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1921 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1922 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1923 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1924 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1925 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1926 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1927 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1928 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1929 } else { 1930 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1931 } 1932 for (i=0;i<pcbddc->benign_n;i++) { 1933 const PetscScalar *vals; 1934 const PetscInt *idxs; 1935 PetscInt n,j,primal_idx; 1936 1937 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1938 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1939 for (j=0;j<n;j++) { 1940 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1941 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1942 } 1943 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1944 } 1945 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1946 1947 /* coarse basis functions */ 1948 for (i=0;i<n_vertices;i++) { 1949 PetscScalar *y; 1950 1951 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1952 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1953 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1954 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1955 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1956 y[n_B*i+idx_V_B[i]] = 1.0; 1957 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1958 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1959 1960 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1961 PetscInt j; 1962 1963 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1964 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1965 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1966 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1967 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1968 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1969 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1970 } 1971 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1972 } 1973 /* if n_R == 0 the object is not destroyed */ 1974 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1975 } 1976 1977 if (n_constraints) { 1978 Mat B; 1979 1980 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1981 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1982 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1983 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1984 if (n_vertices) { 1985 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1986 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1987 } else { 1988 Mat S_VCt; 1989 1990 if (lda_rhs != n_R) { 1991 ierr = MatDestroy(&B);CHKERRQ(ierr); 1992 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1993 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1994 } 1995 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1996 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1997 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1998 } 1999 } 2000 ierr = MatDestroy(&B);CHKERRQ(ierr); 2001 /* coarse basis functions */ 2002 for (i=0;i<n_constraints;i++) { 2003 PetscScalar *y; 2004 2005 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2006 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2007 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2008 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2009 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2011 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2012 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2013 PetscInt j; 2014 2015 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2016 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2017 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2018 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2019 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2020 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2021 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2022 } 2023 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2024 } 2025 } 2026 if (n_constraints) { 2027 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2028 } 2029 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2030 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2031 2032 /* compute other basis functions for non-symmetric problems */ 2033 if (!pcbddc->symmetric_primal) { 2034 Mat B_V=NULL,B_C=NULL; 2035 PetscScalar *marray; 2036 2037 if (n_constraints) { 2038 Mat S_CCT,C_CRT; 2039 2040 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2041 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2042 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2043 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2044 if (n_vertices) { 2045 Mat S_VCT; 2046 2047 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2048 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2049 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2050 } 2051 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2052 } 2053 if (n_vertices && n_R) { 2054 PetscScalar *av,*marray; 2055 const PetscInt *xadj,*adjncy; 2056 PetscInt n; 2057 PetscBool flg_row; 2058 2059 /* B_V = B_V - A_VR^T */ 2060 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2061 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2062 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2063 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2064 for (i=0;i<n;i++) { 2065 PetscInt j; 2066 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2067 } 2068 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2069 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2070 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2071 } 2072 2073 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2074 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2075 for (i=0;i<n_vertices;i++) { 2076 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2077 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2078 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2079 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2080 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2081 } 2082 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2083 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2084 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2085 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2086 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2087 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2088 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2089 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2090 } 2091 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2092 /* coarse basis functions */ 2093 for (i=0;i<pcbddc->local_primal_size;i++) { 2094 PetscScalar *y; 2095 2096 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2097 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2098 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2099 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 if (i<n_vertices) { 2102 y[n_B*i+idx_V_B[i]] = 1.0; 2103 } 2104 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2105 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2106 2107 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2108 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2109 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2110 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2111 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2113 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2114 } 2115 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2116 } 2117 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2118 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2119 } 2120 /* free memory */ 2121 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2122 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2123 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2124 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2125 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2126 ierr = PetscFree(work);CHKERRQ(ierr); 2127 if (n_vertices) { 2128 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2129 } 2130 if (n_constraints) { 2131 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2132 } 2133 /* Checking coarse_sub_mat and coarse basis functios */ 2134 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2135 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2136 if (pcbddc->dbg_flag) { 2137 Mat coarse_sub_mat; 2138 Mat AUXMAT,TM1,TM2,TM3,TM4; 2139 Mat coarse_phi_D,coarse_phi_B; 2140 Mat coarse_psi_D,coarse_psi_B; 2141 Mat A_II,A_BB,A_IB,A_BI; 2142 Mat C_B,CPHI; 2143 IS is_dummy; 2144 Vec mones; 2145 MatType checkmattype=MATSEQAIJ; 2146 PetscReal real_value; 2147 2148 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2149 Mat A; 2150 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2151 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2152 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2153 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2154 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2155 ierr = MatDestroy(&A);CHKERRQ(ierr); 2156 } else { 2157 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2158 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2159 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2160 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2161 } 2162 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2163 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2164 if (!pcbddc->symmetric_primal) { 2165 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2166 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2167 } 2168 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2169 2170 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2172 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2173 if (!pcbddc->symmetric_primal) { 2174 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2175 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2176 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2177 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2178 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2179 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2180 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2181 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2182 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2183 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2184 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2185 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2186 } else { 2187 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2188 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2189 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2190 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2191 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2192 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2193 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2194 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2195 } 2196 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2197 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2198 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2199 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2200 if (pcbddc->benign_n) { 2201 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2202 PetscScalar *data,*data2; 2203 PetscInt j; 2204 2205 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2206 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2207 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2208 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2209 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2210 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2211 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2212 for (j=0;j<pcbddc->benign_n;j++) { 2213 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2214 for (i=0;i<pcbddc->local_primal_size;i++) { 2215 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2216 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2217 } 2218 } 2219 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2220 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2221 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2222 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2223 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2224 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2225 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2226 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2227 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2228 } 2229 #if 0 2230 { 2231 PetscViewer viewer; 2232 char filename[256]; 2233 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2234 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2235 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2236 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2237 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2238 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2239 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2240 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2241 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2242 if (pcbddc->coarse_phi_D) { 2243 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2244 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2245 } 2246 if (pcbddc->coarse_psi_B) { 2247 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2248 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2249 } 2250 if (pcbddc->coarse_phi_B) { 2251 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2252 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2253 } 2254 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2255 } 2256 #endif 2257 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2258 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2261 2262 /* check constraints */ 2263 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2264 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2265 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2266 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2267 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2268 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2269 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2270 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2272 if (!pcbddc->symmetric_primal) { 2273 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2274 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2275 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2276 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2278 } 2279 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2280 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2281 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2282 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2283 } 2284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2285 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2286 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2287 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2288 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2289 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2290 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2291 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2292 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2293 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2294 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2295 if (!pcbddc->symmetric_primal) { 2296 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2297 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2298 } 2299 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2300 } 2301 /* get back data */ 2302 *coarse_submat_vals_n = coarse_submat_vals; 2303 PetscFunctionReturn(0); 2304 } 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2308 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2309 { 2310 Mat *work_mat; 2311 IS isrow_s,iscol_s; 2312 PetscBool rsorted,csorted; 2313 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2318 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2319 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2320 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2321 2322 if (!rsorted) { 2323 const PetscInt *idxs; 2324 PetscInt *idxs_sorted,i; 2325 2326 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2327 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2328 for (i=0;i<rsize;i++) { 2329 idxs_perm_r[i] = i; 2330 } 2331 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2332 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2333 for (i=0;i<rsize;i++) { 2334 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2335 } 2336 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2337 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2338 } else { 2339 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2340 isrow_s = isrow; 2341 } 2342 2343 if (!csorted) { 2344 if (isrow == iscol) { 2345 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2346 iscol_s = isrow_s; 2347 } else { 2348 const PetscInt *idxs; 2349 PetscInt *idxs_sorted,i; 2350 2351 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2352 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2353 for (i=0;i<csize;i++) { 2354 idxs_perm_c[i] = i; 2355 } 2356 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2357 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2358 for (i=0;i<csize;i++) { 2359 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2360 } 2361 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2362 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2363 } 2364 } else { 2365 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2366 iscol_s = iscol; 2367 } 2368 2369 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2370 2371 if (!rsorted || !csorted) { 2372 Mat new_mat; 2373 IS is_perm_r,is_perm_c; 2374 2375 if (!rsorted) { 2376 PetscInt *idxs_r,i; 2377 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2378 for (i=0;i<rsize;i++) { 2379 idxs_r[idxs_perm_r[i]] = i; 2380 } 2381 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2382 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2383 } else { 2384 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2385 } 2386 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2387 2388 if (!csorted) { 2389 if (isrow_s == iscol_s) { 2390 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2391 is_perm_c = is_perm_r; 2392 } else { 2393 PetscInt *idxs_c,i; 2394 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2395 for (i=0;i<csize;i++) { 2396 idxs_c[idxs_perm_c[i]] = i; 2397 } 2398 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2399 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2400 } 2401 } else { 2402 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2403 } 2404 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2405 2406 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2407 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2408 work_mat[0] = new_mat; 2409 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2410 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2411 } 2412 2413 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2414 *B = work_mat[0]; 2415 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2416 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2417 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2423 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2424 { 2425 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2426 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2427 Mat new_mat; 2428 IS is_local,is_global; 2429 PetscInt local_size; 2430 PetscBool isseqaij; 2431 PetscErrorCode ierr; 2432 2433 PetscFunctionBegin; 2434 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2435 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2436 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2437 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2438 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2439 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2440 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2441 2442 /* check */ 2443 if (pcbddc->dbg_flag) { 2444 Vec x,x_change; 2445 PetscReal error; 2446 2447 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2448 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2449 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2450 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2451 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2452 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2453 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2454 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2455 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2456 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2457 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2458 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2459 ierr = VecDestroy(&x);CHKERRQ(ierr); 2460 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2461 } 2462 2463 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2464 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2465 if (isseqaij) { 2466 Mat M; 2467 2468 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2469 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2470 ierr = MatDestroy(&M);CHKERRQ(ierr); 2471 } else { 2472 Mat work_mat,M; 2473 2474 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2475 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2476 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2477 ierr = MatDestroy(&M);CHKERRQ(ierr); 2478 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2479 } 2480 if (matis->A->symmetric_set) { 2481 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2482 #if !defined(PETSC_USE_COMPLEX) 2483 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2484 #endif 2485 } 2486 /* 2487 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2488 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2489 */ 2490 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 #undef __FUNCT__ 2495 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2496 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2497 { 2498 PC_IS* pcis = (PC_IS*)(pc->data); 2499 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2500 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2501 PetscInt *idx_R_local=NULL; 2502 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2503 PetscInt vbs,bs; 2504 PetscBT bitmask=NULL; 2505 PetscErrorCode ierr; 2506 2507 PetscFunctionBegin; 2508 /* 2509 No need to setup local scatters if 2510 - primal space is unchanged 2511 AND 2512 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2513 AND 2514 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2515 */ 2516 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2517 PetscFunctionReturn(0); 2518 } 2519 /* destroy old objects */ 2520 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2521 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2522 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2523 /* Set Non-overlapping dimensions */ 2524 n_B = pcis->n_B; 2525 n_D = pcis->n - n_B; 2526 n_vertices = pcbddc->n_vertices; 2527 2528 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2529 2530 /* create auxiliary bitmask and allocate workspace */ 2531 if (!sub_schurs->reuse_solver) { 2532 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2533 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2534 for (i=0;i<n_vertices;i++) { 2535 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2536 } 2537 2538 for (i=0, n_R=0; i<pcis->n; i++) { 2539 if (!PetscBTLookup(bitmask,i)) { 2540 idx_R_local[n_R++] = i; 2541 } 2542 } 2543 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2544 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2545 2546 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2547 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2548 } 2549 2550 /* Block code */ 2551 vbs = 1; 2552 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2553 if (bs>1 && !(n_vertices%bs)) { 2554 PetscBool is_blocked = PETSC_TRUE; 2555 PetscInt *vary; 2556 if (!sub_schurs->reuse_solver) { 2557 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2558 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2559 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2560 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2561 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2562 for (i=0; i<pcis->n/bs; i++) { 2563 if (vary[i]!=0 && vary[i]!=bs) { 2564 is_blocked = PETSC_FALSE; 2565 break; 2566 } 2567 } 2568 ierr = PetscFree(vary);CHKERRQ(ierr); 2569 } else { 2570 /* Verify directly the R set */ 2571 for (i=0; i<n_R/bs; i++) { 2572 PetscInt j,node=idx_R_local[bs*i]; 2573 for (j=1; j<bs; j++) { 2574 if (node != idx_R_local[bs*i+j]-j) { 2575 is_blocked = PETSC_FALSE; 2576 break; 2577 } 2578 } 2579 } 2580 } 2581 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2582 vbs = bs; 2583 for (i=0;i<n_R/vbs;i++) { 2584 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2585 } 2586 } 2587 } 2588 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2589 if (sub_schurs->reuse_solver) { 2590 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2591 2592 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2593 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2594 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2595 reuse_solver->is_R = pcbddc->is_R_local; 2596 } else { 2597 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2598 } 2599 2600 /* print some info if requested */ 2601 if (pcbddc->dbg_flag) { 2602 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2603 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2604 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2606 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2608 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2609 } 2610 2611 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2612 if (!sub_schurs->reuse_solver) { 2613 IS is_aux1,is_aux2; 2614 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2615 2616 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2617 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2618 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2619 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2620 for (i=0; i<n_D; i++) { 2621 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2622 } 2623 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2624 for (i=0, j=0; i<n_R; i++) { 2625 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2626 aux_array1[j++] = i; 2627 } 2628 } 2629 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2630 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2631 for (i=0, j=0; i<n_B; i++) { 2632 if (!PetscBTLookup(bitmask,is_indices[i])) { 2633 aux_array2[j++] = i; 2634 } 2635 } 2636 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2637 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2638 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2639 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2640 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2641 2642 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2643 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2644 for (i=0, j=0; i<n_R; i++) { 2645 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2646 aux_array1[j++] = i; 2647 } 2648 } 2649 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2650 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2651 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2652 } 2653 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2654 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2655 } else { 2656 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2657 IS tis; 2658 PetscInt schur_size; 2659 2660 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2661 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2662 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2663 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2664 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2665 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2666 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2667 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2668 } 2669 } 2670 PetscFunctionReturn(0); 2671 } 2672 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2676 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2677 { 2678 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2679 PC_IS *pcis = (PC_IS*)pc->data; 2680 PC pc_temp; 2681 Mat A_RR; 2682 MatReuse reuse; 2683 PetscScalar m_one = -1.0; 2684 PetscReal value; 2685 PetscInt n_D,n_R; 2686 PetscBool use_exact,use_exact_reduced,issbaij; 2687 PetscErrorCode ierr; 2688 /* prefixes stuff */ 2689 char dir_prefix[256],neu_prefix[256],str_level[16]; 2690 size_t len; 2691 2692 PetscFunctionBegin; 2693 2694 /* compute prefixes */ 2695 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2696 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2697 if (!pcbddc->current_level) { 2698 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2699 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2700 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2701 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2702 } else { 2703 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2704 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2705 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2706 len -= 15; /* remove "pc_bddc_coarse_" */ 2707 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2708 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2709 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2710 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2711 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2712 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2713 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2714 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2715 } 2716 2717 /* DIRICHLET PROBLEM */ 2718 if (dirichlet) { 2719 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2720 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2721 Mat A_IIn; 2722 2723 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2724 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2725 pcis->A_II = A_IIn; 2726 } 2727 if (pcbddc->local_mat->symmetric_set) { 2728 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2729 } 2730 /* Matrix for Dirichlet problem is pcis->A_II */ 2731 n_D = pcis->n - pcis->n_B; 2732 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2733 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2734 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2735 /* default */ 2736 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2737 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2738 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2739 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2740 if (issbaij) { 2741 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2742 } else { 2743 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2744 } 2745 /* Allow user's customization */ 2746 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2747 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2748 } 2749 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2750 if (sub_schurs->reuse_solver) { 2751 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2752 2753 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2754 } 2755 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2756 if (!n_D) { 2757 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2758 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2759 } 2760 /* Set Up KSP for Dirichlet problem of BDDC */ 2761 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2762 /* set ksp_D into pcis data */ 2763 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2764 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2765 pcis->ksp_D = pcbddc->ksp_D; 2766 } 2767 2768 /* NEUMANN PROBLEM */ 2769 A_RR = 0; 2770 if (neumann) { 2771 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2772 PetscInt ibs,mbs; 2773 PetscBool issbaij; 2774 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2775 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2776 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2777 if (pcbddc->ksp_R) { /* already created ksp */ 2778 PetscInt nn_R; 2779 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2780 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2781 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2782 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2783 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2784 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2785 reuse = MAT_INITIAL_MATRIX; 2786 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2787 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2788 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2789 reuse = MAT_INITIAL_MATRIX; 2790 } else { /* safe to reuse the matrix */ 2791 reuse = MAT_REUSE_MATRIX; 2792 } 2793 } 2794 /* last check */ 2795 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2796 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2797 reuse = MAT_INITIAL_MATRIX; 2798 } 2799 } else { /* first time, so we need to create the matrix */ 2800 reuse = MAT_INITIAL_MATRIX; 2801 } 2802 /* extract A_RR */ 2803 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2804 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2805 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2806 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2807 if (matis->A == pcbddc->local_mat) { 2808 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2809 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2810 } else { 2811 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2812 } 2813 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2814 if (matis->A == pcbddc->local_mat) { 2815 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2816 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2817 } else { 2818 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2819 } 2820 } 2821 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2822 if (pcbddc->local_mat->symmetric_set) { 2823 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2824 } 2825 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2826 Mat A_RRn; 2827 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2828 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2829 A_RR = A_RRn; 2830 } 2831 if (!pcbddc->ksp_R) { /* create object if not present */ 2832 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2833 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2834 /* default */ 2835 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2836 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2837 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2838 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2839 if (issbaij) { 2840 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2841 } else { 2842 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2843 } 2844 /* Allow user's customization */ 2845 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2846 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2847 } 2848 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2849 if (!n_R) { 2850 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2851 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2852 } 2853 /* Reuse solver if it is present */ 2854 if (sub_schurs->reuse_solver) { 2855 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2856 2857 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2858 } 2859 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2860 /* Set Up KSP for Neumann problem of BDDC */ 2861 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2862 } 2863 /* free Neumann problem's matrix */ 2864 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2865 2866 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2867 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2868 if (pcbddc->dbg_flag) { 2869 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2870 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2871 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2872 } 2873 if (dirichlet) { /* Dirichlet */ 2874 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2875 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2876 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2877 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2878 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2879 /* need to be adapted? */ 2880 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2881 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2882 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2883 /* print info */ 2884 if (pcbddc->dbg_flag) { 2885 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2886 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2887 } 2888 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2889 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2890 } 2891 } 2892 if (neumann) { /* Neumann */ 2893 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2894 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2895 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2896 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2897 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2898 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2899 /* need to be adapted? */ 2900 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2901 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2902 /* print info */ 2903 if (pcbddc->dbg_flag) { 2904 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2906 } 2907 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2908 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2909 } 2910 } 2911 } 2912 PetscFunctionReturn(0); 2913 } 2914 2915 #undef __FUNCT__ 2916 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2917 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2918 { 2919 PetscErrorCode ierr; 2920 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2921 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2922 2923 PetscFunctionBegin; 2924 if (!sub_schurs->reuse_solver) { 2925 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2926 } 2927 if (!pcbddc->switch_static) { 2928 if (applytranspose && pcbddc->local_auxmat1) { 2929 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2930 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2931 } 2932 if (!sub_schurs->reuse_solver) { 2933 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2934 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2935 } else { 2936 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2937 2938 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2939 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2940 } 2941 } else { 2942 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2943 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2944 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2945 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2946 if (applytranspose && pcbddc->local_auxmat1) { 2947 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2948 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2949 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2950 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2951 } 2952 } 2953 if (!sub_schurs->reuse_solver) { 2954 if (applytranspose) { 2955 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2956 } else { 2957 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2958 } 2959 } else { 2960 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2961 2962 if (applytranspose) { 2963 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2964 } else { 2965 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2966 } 2967 } 2968 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2969 if (!pcbddc->switch_static) { 2970 if (!sub_schurs->reuse_solver) { 2971 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2972 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2973 } else { 2974 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2975 2976 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2977 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2978 } 2979 if (!applytranspose && pcbddc->local_auxmat1) { 2980 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2981 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2982 } 2983 } else { 2984 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2985 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2986 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2987 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2988 if (!applytranspose && pcbddc->local_auxmat1) { 2989 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2990 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2991 } 2992 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2993 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2994 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2995 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2996 } 2997 PetscFunctionReturn(0); 2998 } 2999 3000 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3001 #undef __FUNCT__ 3002 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3003 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3004 { 3005 PetscErrorCode ierr; 3006 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3007 PC_IS* pcis = (PC_IS*) (pc->data); 3008 const PetscScalar zero = 0.0; 3009 3010 PetscFunctionBegin; 3011 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3012 if (!pcbddc->benign_apply_coarse_only) { 3013 if (applytranspose) { 3014 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3015 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3016 } else { 3017 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3018 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3019 } 3020 } else { 3021 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3022 } 3023 3024 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3025 if (pcbddc->benign_n) { 3026 PetscScalar *array; 3027 PetscInt j; 3028 3029 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3030 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3031 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3032 } 3033 3034 /* start communications from local primal nodes to rhs of coarse solver */ 3035 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3036 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3037 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3038 3039 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3040 /* TODO remove null space when doing multilevel */ 3041 if (pcbddc->coarse_ksp) { 3042 Mat coarse_mat; 3043 Vec rhs,sol; 3044 MatNullSpace nullsp; 3045 3046 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3047 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3048 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3049 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3050 if (nullsp) { 3051 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3052 } 3053 if (applytranspose) { 3054 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3055 } else { 3056 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3057 } 3058 if (nullsp) { 3059 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3060 } 3061 } 3062 3063 /* Local solution on R nodes */ 3064 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3065 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3066 } 3067 /* communications from coarse sol to local primal nodes */ 3068 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3069 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3070 3071 /* Sum contributions from the two levels */ 3072 if (!pcbddc->benign_apply_coarse_only) { 3073 if (applytranspose) { 3074 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3075 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3076 } else { 3077 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3078 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3079 } 3080 /* store p0 */ 3081 if (pcbddc->benign_n) { 3082 PetscScalar *array; 3083 PetscInt j; 3084 3085 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3086 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3087 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3088 } 3089 } else { /* expand the coarse solution */ 3090 if (applytranspose) { 3091 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3092 } else { 3093 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3094 } 3095 } 3096 PetscFunctionReturn(0); 3097 } 3098 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3101 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3102 { 3103 PetscErrorCode ierr; 3104 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3105 PetscScalar *array; 3106 Vec from,to; 3107 3108 PetscFunctionBegin; 3109 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3110 from = pcbddc->coarse_vec; 3111 to = pcbddc->vec1_P; 3112 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3113 Vec tvec; 3114 3115 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3116 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3117 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3118 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3119 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3120 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3121 } 3122 } else { /* from local to global -> put data in coarse right hand side */ 3123 from = pcbddc->vec1_P; 3124 to = pcbddc->coarse_vec; 3125 } 3126 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3127 PetscFunctionReturn(0); 3128 } 3129 3130 #undef __FUNCT__ 3131 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3132 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3133 { 3134 PetscErrorCode ierr; 3135 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3136 PetscScalar *array; 3137 Vec from,to; 3138 3139 PetscFunctionBegin; 3140 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3141 from = pcbddc->coarse_vec; 3142 to = pcbddc->vec1_P; 3143 } else { /* from local to global -> put data in coarse right hand side */ 3144 from = pcbddc->vec1_P; 3145 to = pcbddc->coarse_vec; 3146 } 3147 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3148 if (smode == SCATTER_FORWARD) { 3149 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3150 Vec tvec; 3151 3152 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3153 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3154 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3155 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3156 } 3157 } else { 3158 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3159 ierr = VecResetArray(from);CHKERRQ(ierr); 3160 } 3161 } 3162 PetscFunctionReturn(0); 3163 } 3164 3165 /* uncomment for testing purposes */ 3166 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3169 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3170 { 3171 PetscErrorCode ierr; 3172 PC_IS* pcis = (PC_IS*)(pc->data); 3173 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3174 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3175 /* one and zero */ 3176 PetscScalar one=1.0,zero=0.0; 3177 /* space to store constraints and their local indices */ 3178 PetscScalar *constraints_data; 3179 PetscInt *constraints_idxs,*constraints_idxs_B; 3180 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3181 PetscInt *constraints_n; 3182 /* iterators */ 3183 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3184 /* BLAS integers */ 3185 PetscBLASInt lwork,lierr; 3186 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3187 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3188 /* reuse */ 3189 PetscInt olocal_primal_size,olocal_primal_size_cc; 3190 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3191 /* change of basis */ 3192 PetscBool qr_needed; 3193 PetscBT change_basis,qr_needed_idx; 3194 /* auxiliary stuff */ 3195 PetscInt *nnz,*is_indices; 3196 PetscInt ncc; 3197 /* some quantities */ 3198 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3199 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3200 3201 PetscFunctionBegin; 3202 /* Destroy Mat objects computed previously */ 3203 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3204 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3205 /* save info on constraints from previous setup (if any) */ 3206 olocal_primal_size = pcbddc->local_primal_size; 3207 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3208 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3209 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3210 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3211 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3212 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3213 3214 /* print some info */ 3215 if (pcbddc->dbg_flag) { 3216 IS vertices; 3217 PetscInt nv,nedges,nfaces; 3218 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3219 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3220 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3221 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3223 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3226 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3227 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3228 } 3229 3230 if (!pcbddc->adaptive_selection) { 3231 IS ISForVertices,*ISForFaces,*ISForEdges; 3232 MatNullSpace nearnullsp; 3233 const Vec *nearnullvecs; 3234 Vec *localnearnullsp; 3235 PetscScalar *array; 3236 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3237 PetscBool nnsp_has_cnst; 3238 /* LAPACK working arrays for SVD or POD */ 3239 PetscBool skip_lapack,boolforchange; 3240 PetscScalar *work; 3241 PetscReal *singular_vals; 3242 #if defined(PETSC_USE_COMPLEX) 3243 PetscReal *rwork; 3244 #endif 3245 #if defined(PETSC_MISSING_LAPACK_GESVD) 3246 PetscScalar *temp_basis,*correlation_mat; 3247 #else 3248 PetscBLASInt dummy_int=1; 3249 PetscScalar dummy_scalar=1.; 3250 #endif 3251 3252 /* Get index sets for faces, edges and vertices from graph */ 3253 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3254 /* free unneeded index sets */ 3255 if (!pcbddc->use_vertices) { 3256 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3257 } 3258 if (!pcbddc->use_edges) { 3259 for (i=0;i<n_ISForEdges;i++) { 3260 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3261 } 3262 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3263 n_ISForEdges = 0; 3264 } 3265 if (!pcbddc->use_faces) { 3266 for (i=0;i<n_ISForFaces;i++) { 3267 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3268 } 3269 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3270 n_ISForFaces = 0; 3271 } 3272 3273 #if defined(PETSC_USE_DEBUG) 3274 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3275 Also use_change_of_basis should be consistent among processors */ 3276 if (pcbddc->NullSpace) { 3277 PetscBool tbool[2],gbool[2]; 3278 3279 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3280 pcbddc->use_change_of_basis = PETSC_TRUE; 3281 if (!ISForEdges) { 3282 pcbddc->use_change_on_faces = PETSC_TRUE; 3283 } 3284 } 3285 tbool[0] = pcbddc->use_change_of_basis; 3286 tbool[1] = pcbddc->use_change_on_faces; 3287 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3288 pcbddc->use_change_of_basis = gbool[0]; 3289 pcbddc->use_change_on_faces = gbool[1]; 3290 } 3291 #endif 3292 3293 /* check if near null space is attached to global mat */ 3294 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3295 if (nearnullsp) { 3296 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3297 /* remove any stored info */ 3298 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3299 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3300 /* store information for BDDC solver reuse */ 3301 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3302 pcbddc->onearnullspace = nearnullsp; 3303 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3304 for (i=0;i<nnsp_size;i++) { 3305 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3306 } 3307 } else { /* if near null space is not provided BDDC uses constants by default */ 3308 nnsp_size = 0; 3309 nnsp_has_cnst = PETSC_TRUE; 3310 } 3311 /* get max number of constraints on a single cc */ 3312 max_constraints = nnsp_size; 3313 if (nnsp_has_cnst) max_constraints++; 3314 3315 /* 3316 Evaluate maximum storage size needed by the procedure 3317 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3318 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3319 There can be multiple constraints per connected component 3320 */ 3321 n_vertices = 0; 3322 if (ISForVertices) { 3323 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3324 } 3325 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3326 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3327 3328 total_counts = n_ISForFaces+n_ISForEdges; 3329 total_counts *= max_constraints; 3330 total_counts += n_vertices; 3331 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3332 3333 total_counts = 0; 3334 max_size_of_constraint = 0; 3335 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3336 IS used_is; 3337 if (i<n_ISForEdges) { 3338 used_is = ISForEdges[i]; 3339 } else { 3340 used_is = ISForFaces[i-n_ISForEdges]; 3341 } 3342 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3343 total_counts += j; 3344 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3345 } 3346 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3347 3348 /* get local part of global near null space vectors */ 3349 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3350 for (k=0;k<nnsp_size;k++) { 3351 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3352 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3353 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3354 } 3355 3356 /* whether or not to skip lapack calls */ 3357 skip_lapack = PETSC_TRUE; 3358 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3359 3360 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3361 if (!skip_lapack) { 3362 PetscScalar temp_work; 3363 3364 #if defined(PETSC_MISSING_LAPACK_GESVD) 3365 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3366 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3367 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3368 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3369 #if defined(PETSC_USE_COMPLEX) 3370 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3371 #endif 3372 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3373 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3374 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3375 lwork = -1; 3376 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3377 #if !defined(PETSC_USE_COMPLEX) 3378 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3379 #else 3380 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3381 #endif 3382 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3383 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3384 #else /* on missing GESVD */ 3385 /* SVD */ 3386 PetscInt max_n,min_n; 3387 max_n = max_size_of_constraint; 3388 min_n = max_constraints; 3389 if (max_size_of_constraint < max_constraints) { 3390 min_n = max_size_of_constraint; 3391 max_n = max_constraints; 3392 } 3393 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3394 #if defined(PETSC_USE_COMPLEX) 3395 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3396 #endif 3397 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3398 lwork = -1; 3399 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3400 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3401 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3402 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3403 #if !defined(PETSC_USE_COMPLEX) 3404 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3405 #else 3406 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3407 #endif 3408 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3409 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3410 #endif /* on missing GESVD */ 3411 /* Allocate optimal workspace */ 3412 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3413 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3414 } 3415 /* Now we can loop on constraining sets */ 3416 total_counts = 0; 3417 constraints_idxs_ptr[0] = 0; 3418 constraints_data_ptr[0] = 0; 3419 /* vertices */ 3420 if (n_vertices) { 3421 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3422 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3423 for (i=0;i<n_vertices;i++) { 3424 constraints_n[total_counts] = 1; 3425 constraints_data[total_counts] = 1.0; 3426 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3427 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3428 total_counts++; 3429 } 3430 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3431 n_vertices = total_counts; 3432 } 3433 3434 /* edges and faces */ 3435 total_counts_cc = total_counts; 3436 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3437 IS used_is; 3438 PetscBool idxs_copied = PETSC_FALSE; 3439 3440 if (ncc<n_ISForEdges) { 3441 used_is = ISForEdges[ncc]; 3442 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3443 } else { 3444 used_is = ISForFaces[ncc-n_ISForEdges]; 3445 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3446 } 3447 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3448 3449 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3450 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3451 /* change of basis should not be performed on local periodic nodes */ 3452 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3453 if (nnsp_has_cnst) { 3454 PetscScalar quad_value; 3455 3456 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3457 idxs_copied = PETSC_TRUE; 3458 3459 if (!pcbddc->use_nnsp_true) { 3460 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3461 } else { 3462 quad_value = 1.0; 3463 } 3464 for (j=0;j<size_of_constraint;j++) { 3465 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3466 } 3467 temp_constraints++; 3468 total_counts++; 3469 } 3470 for (k=0;k<nnsp_size;k++) { 3471 PetscReal real_value; 3472 PetscScalar *ptr_to_data; 3473 3474 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3475 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3476 for (j=0;j<size_of_constraint;j++) { 3477 ptr_to_data[j] = array[is_indices[j]]; 3478 } 3479 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3480 /* check if array is null on the connected component */ 3481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3482 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3483 if (real_value > 0.0) { /* keep indices and values */ 3484 temp_constraints++; 3485 total_counts++; 3486 if (!idxs_copied) { 3487 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3488 idxs_copied = PETSC_TRUE; 3489 } 3490 } 3491 } 3492 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3493 valid_constraints = temp_constraints; 3494 if (!pcbddc->use_nnsp_true && temp_constraints) { 3495 if (temp_constraints == 1) { /* just normalize the constraint */ 3496 PetscScalar norm,*ptr_to_data; 3497 3498 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3500 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3501 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3502 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3503 } else { /* perform SVD */ 3504 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3505 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3506 3507 #if defined(PETSC_MISSING_LAPACK_GESVD) 3508 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3509 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3510 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3511 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3512 from that computed using LAPACKgesvd 3513 -> This is due to a different computation of eigenvectors in LAPACKheev 3514 -> The quality of the POD-computed basis will be the same */ 3515 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3516 /* Store upper triangular part of correlation matrix */ 3517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3518 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3519 for (j=0;j<temp_constraints;j++) { 3520 for (k=0;k<j+1;k++) { 3521 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3522 } 3523 } 3524 /* compute eigenvalues and eigenvectors of correlation matrix */ 3525 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3526 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3527 #if !defined(PETSC_USE_COMPLEX) 3528 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3529 #else 3530 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3531 #endif 3532 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3533 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3534 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3535 j = 0; 3536 while (j < temp_constraints && singular_vals[j] < tol) j++; 3537 total_counts = total_counts-j; 3538 valid_constraints = temp_constraints-j; 3539 /* scale and copy POD basis into used quadrature memory */ 3540 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3541 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3542 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3543 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3544 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3546 if (j<temp_constraints) { 3547 PetscInt ii; 3548 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3549 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3550 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3551 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3552 for (k=0;k<temp_constraints-j;k++) { 3553 for (ii=0;ii<size_of_constraint;ii++) { 3554 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3555 } 3556 } 3557 } 3558 #else /* on missing GESVD */ 3559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3560 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3562 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3563 #if !defined(PETSC_USE_COMPLEX) 3564 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3565 #else 3566 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3567 #endif 3568 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3569 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3570 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3571 k = temp_constraints; 3572 if (k > size_of_constraint) k = size_of_constraint; 3573 j = 0; 3574 while (j < k && singular_vals[k-j-1] < tol) j++; 3575 valid_constraints = k-j; 3576 total_counts = total_counts-temp_constraints+valid_constraints; 3577 #endif /* on missing GESVD */ 3578 } 3579 } 3580 /* update pointers information */ 3581 if (valid_constraints) { 3582 constraints_n[total_counts_cc] = valid_constraints; 3583 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3584 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3585 /* set change_of_basis flag */ 3586 if (boolforchange) { 3587 PetscBTSet(change_basis,total_counts_cc); 3588 } 3589 total_counts_cc++; 3590 } 3591 } 3592 /* free workspace */ 3593 if (!skip_lapack) { 3594 ierr = PetscFree(work);CHKERRQ(ierr); 3595 #if defined(PETSC_USE_COMPLEX) 3596 ierr = PetscFree(rwork);CHKERRQ(ierr); 3597 #endif 3598 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3599 #if defined(PETSC_MISSING_LAPACK_GESVD) 3600 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3601 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3602 #endif 3603 } 3604 for (k=0;k<nnsp_size;k++) { 3605 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3606 } 3607 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3608 /* free index sets of faces, edges and vertices */ 3609 for (i=0;i<n_ISForFaces;i++) { 3610 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3611 } 3612 if (n_ISForFaces) { 3613 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3614 } 3615 for (i=0;i<n_ISForEdges;i++) { 3616 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3617 } 3618 if (n_ISForEdges) { 3619 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3620 } 3621 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3622 } else { 3623 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3624 3625 total_counts = 0; 3626 n_vertices = 0; 3627 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3628 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3629 } 3630 max_constraints = 0; 3631 total_counts_cc = 0; 3632 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3633 total_counts += pcbddc->adaptive_constraints_n[i]; 3634 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3635 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3636 } 3637 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3638 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3639 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3640 constraints_data = pcbddc->adaptive_constraints_data; 3641 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3642 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3643 total_counts_cc = 0; 3644 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3645 if (pcbddc->adaptive_constraints_n[i]) { 3646 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3647 } 3648 } 3649 #if 0 3650 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3651 for (i=0;i<total_counts_cc;i++) { 3652 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3653 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3654 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3655 printf(" %d",constraints_idxs[j]); 3656 } 3657 printf("\n"); 3658 printf("number of cc: %d\n",constraints_n[i]); 3659 } 3660 for (i=0;i<n_vertices;i++) { 3661 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3662 } 3663 for (i=0;i<sub_schurs->n_subs;i++) { 3664 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3665 } 3666 #endif 3667 3668 max_size_of_constraint = 0; 3669 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3670 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3671 /* Change of basis */ 3672 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3673 if (pcbddc->use_change_of_basis) { 3674 for (i=0;i<sub_schurs->n_subs;i++) { 3675 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3676 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3677 } 3678 } 3679 } 3680 } 3681 pcbddc->local_primal_size = total_counts; 3682 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3683 3684 /* map constraints_idxs in boundary numbering */ 3685 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3686 if (i != constraints_idxs_ptr[total_counts_cc]) { 3687 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3688 } 3689 3690 /* Create constraint matrix */ 3691 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3692 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3693 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3694 3695 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3696 /* determine if a QR strategy is needed for change of basis */ 3697 qr_needed = PETSC_FALSE; 3698 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3699 total_primal_vertices=0; 3700 pcbddc->local_primal_size_cc = 0; 3701 for (i=0;i<total_counts_cc;i++) { 3702 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3703 if (size_of_constraint == 1) { 3704 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3705 pcbddc->local_primal_size_cc += 1; 3706 } else if (PetscBTLookup(change_basis,i)) { 3707 for (k=0;k<constraints_n[i];k++) { 3708 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3709 } 3710 pcbddc->local_primal_size_cc += constraints_n[i]; 3711 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3712 PetscBTSet(qr_needed_idx,i); 3713 qr_needed = PETSC_TRUE; 3714 } 3715 } else { 3716 pcbddc->local_primal_size_cc += 1; 3717 } 3718 } 3719 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3720 pcbddc->n_vertices = total_primal_vertices; 3721 /* permute indices in order to have a sorted set of vertices */ 3722 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3723 3724 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3725 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3726 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3727 3728 /* nonzero structure of constraint matrix */ 3729 /* and get reference dof for local constraints */ 3730 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3731 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3732 3733 j = total_primal_vertices; 3734 total_counts = total_primal_vertices; 3735 cum = total_primal_vertices; 3736 for (i=n_vertices;i<total_counts_cc;i++) { 3737 if (!PetscBTLookup(change_basis,i)) { 3738 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3739 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3740 cum++; 3741 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3742 for (k=0;k<constraints_n[i];k++) { 3743 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3744 nnz[j+k] = size_of_constraint; 3745 } 3746 j += constraints_n[i]; 3747 } 3748 } 3749 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3750 ierr = PetscFree(nnz);CHKERRQ(ierr); 3751 3752 /* set values in constraint matrix */ 3753 for (i=0;i<total_primal_vertices;i++) { 3754 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3755 } 3756 total_counts = total_primal_vertices; 3757 for (i=n_vertices;i<total_counts_cc;i++) { 3758 if (!PetscBTLookup(change_basis,i)) { 3759 PetscInt *cols; 3760 3761 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3762 cols = constraints_idxs+constraints_idxs_ptr[i]; 3763 for (k=0;k<constraints_n[i];k++) { 3764 PetscInt row = total_counts+k; 3765 PetscScalar *vals; 3766 3767 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3768 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3769 } 3770 total_counts += constraints_n[i]; 3771 } 3772 } 3773 /* assembling */ 3774 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3775 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3776 3777 /* 3778 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3779 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3780 */ 3781 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3782 if (pcbddc->use_change_of_basis) { 3783 /* dual and primal dofs on a single cc */ 3784 PetscInt dual_dofs,primal_dofs; 3785 /* working stuff for GEQRF */ 3786 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3787 PetscBLASInt lqr_work; 3788 /* working stuff for UNGQR */ 3789 PetscScalar *gqr_work,lgqr_work_t; 3790 PetscBLASInt lgqr_work; 3791 /* working stuff for TRTRS */ 3792 PetscScalar *trs_rhs; 3793 PetscBLASInt Blas_NRHS; 3794 /* pointers for values insertion into change of basis matrix */ 3795 PetscInt *start_rows,*start_cols; 3796 PetscScalar *start_vals; 3797 /* working stuff for values insertion */ 3798 PetscBT is_primal; 3799 PetscInt *aux_primal_numbering_B; 3800 /* matrix sizes */ 3801 PetscInt global_size,local_size; 3802 /* temporary change of basis */ 3803 Mat localChangeOfBasisMatrix; 3804 /* extra space for debugging */ 3805 PetscScalar *dbg_work; 3806 3807 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3808 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3809 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3810 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3811 /* nonzeros for local mat */ 3812 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3813 for (i=0;i<pcis->n;i++) nnz[i]=1; 3814 for (i=n_vertices;i<total_counts_cc;i++) { 3815 if (PetscBTLookup(change_basis,i)) { 3816 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3817 if (PetscBTLookup(qr_needed_idx,i)) { 3818 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3819 } else { 3820 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3821 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3822 } 3823 } 3824 } 3825 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3826 ierr = PetscFree(nnz);CHKERRQ(ierr); 3827 /* Set initial identity in the matrix */ 3828 for (i=0;i<pcis->n;i++) { 3829 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3830 } 3831 3832 if (pcbddc->dbg_flag) { 3833 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3834 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3835 } 3836 3837 3838 /* Now we loop on the constraints which need a change of basis */ 3839 /* 3840 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3841 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3842 3843 Basic blocks of change of basis matrix T computed by 3844 3845 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3846 3847 | 1 0 ... 0 s_1/S | 3848 | 0 1 ... 0 s_2/S | 3849 | ... | 3850 | 0 ... 1 s_{n-1}/S | 3851 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3852 3853 with S = \sum_{i=1}^n s_i^2 3854 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3855 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3856 3857 - QR decomposition of constraints otherwise 3858 */ 3859 if (qr_needed) { 3860 /* space to store Q */ 3861 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3862 /* first we issue queries for optimal work */ 3863 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3864 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3865 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3866 lqr_work = -1; 3867 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3868 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3869 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3870 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3871 lgqr_work = -1; 3872 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3873 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3874 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3875 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3876 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3877 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3878 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3879 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3880 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3881 /* array to store scaling factors for reflectors */ 3882 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3883 /* array to store rhs and solution of triangular solver */ 3884 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3885 /* allocating workspace for check */ 3886 if (pcbddc->dbg_flag) { 3887 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3888 } 3889 } 3890 /* array to store whether a node is primal or not */ 3891 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3892 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3893 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3894 if (i != total_primal_vertices) { 3895 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3896 } 3897 for (i=0;i<total_primal_vertices;i++) { 3898 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3899 } 3900 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3901 3902 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3903 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3904 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3905 if (PetscBTLookup(change_basis,total_counts)) { 3906 /* get constraint info */ 3907 primal_dofs = constraints_n[total_counts]; 3908 dual_dofs = size_of_constraint-primal_dofs; 3909 3910 if (pcbddc->dbg_flag) { 3911 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3912 } 3913 3914 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3915 3916 /* copy quadrature constraints for change of basis check */ 3917 if (pcbddc->dbg_flag) { 3918 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3919 } 3920 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3921 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3922 3923 /* compute QR decomposition of constraints */ 3924 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3925 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3926 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3927 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3928 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3929 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3930 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3931 3932 /* explictly compute R^-T */ 3933 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3934 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3935 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3936 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3937 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3938 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3939 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3940 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3941 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3942 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3943 3944 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3947 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3948 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3949 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3950 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3951 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3953 3954 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3955 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3956 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3957 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3958 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3959 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3961 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3963 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3964 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3965 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3966 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3967 3968 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3969 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3970 /* insert cols for primal dofs */ 3971 for (j=0;j<primal_dofs;j++) { 3972 start_vals = &qr_basis[j*size_of_constraint]; 3973 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3974 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3975 } 3976 /* insert cols for dual dofs */ 3977 for (j=0,k=0;j<dual_dofs;k++) { 3978 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3979 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3980 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3981 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3982 j++; 3983 } 3984 } 3985 3986 /* check change of basis */ 3987 if (pcbddc->dbg_flag) { 3988 PetscInt ii,jj; 3989 PetscBool valid_qr=PETSC_TRUE; 3990 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3993 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3994 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3995 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3996 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3997 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3998 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3999 for (jj=0;jj<size_of_constraint;jj++) { 4000 for (ii=0;ii<primal_dofs;ii++) { 4001 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4002 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4003 } 4004 } 4005 if (!valid_qr) { 4006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4007 for (jj=0;jj<size_of_constraint;jj++) { 4008 for (ii=0;ii<primal_dofs;ii++) { 4009 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4010 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4011 } 4012 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4013 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4014 } 4015 } 4016 } 4017 } else { 4018 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4019 } 4020 } 4021 } else { /* simple transformation block */ 4022 PetscInt row,col; 4023 PetscScalar val,norm; 4024 4025 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4026 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4027 for (j=0;j<size_of_constraint;j++) { 4028 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4029 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4030 if (!PetscBTLookup(is_primal,row_B)) { 4031 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4032 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4033 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4034 } else { 4035 for (k=0;k<size_of_constraint;k++) { 4036 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4037 if (row != col) { 4038 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4039 } else { 4040 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4041 } 4042 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4043 } 4044 } 4045 } 4046 if (pcbddc->dbg_flag) { 4047 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4048 } 4049 } 4050 } else { 4051 if (pcbddc->dbg_flag) { 4052 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4053 } 4054 } 4055 } 4056 4057 /* free workspace */ 4058 if (qr_needed) { 4059 if (pcbddc->dbg_flag) { 4060 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4061 } 4062 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4063 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4064 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4065 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4066 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4067 } 4068 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4069 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4070 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4071 4072 /* assembling of global change of variable */ 4073 { 4074 Mat tmat; 4075 PetscInt bs; 4076 4077 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4078 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4079 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4080 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4081 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4082 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4083 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4084 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4085 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4086 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4087 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4088 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4089 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4090 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4091 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4092 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4093 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4094 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4095 } 4096 /* check */ 4097 if (pcbddc->dbg_flag) { 4098 PetscReal error; 4099 Vec x,x_change; 4100 4101 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4102 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4103 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4104 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4105 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4106 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4107 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4108 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4109 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4110 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4111 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4112 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4113 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4115 ierr = VecDestroy(&x);CHKERRQ(ierr); 4116 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4117 } 4118 4119 /* adapt sub_schurs computed (if any) */ 4120 if (pcbddc->use_deluxe_scaling) { 4121 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4122 if (sub_schurs->S_Ej_all) { 4123 Mat S_new,tmat; 4124 ISLocalToGlobalMapping NtoSall; 4125 IS is_all_N,is_V,is_V_Sall; 4126 const PetscScalar *array; 4127 const PetscInt *idxs_V,*idxs_all; 4128 PetscInt i,n_V; 4129 4130 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4131 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4132 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4133 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4134 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4135 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4136 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4137 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4138 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4139 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4140 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4141 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4142 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4143 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4144 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4145 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4146 for (i=0;i<n_V;i++) { 4147 PetscScalar val; 4148 PetscInt idx; 4149 4150 idx = idxs_V[i]; 4151 val = array[idxs_all[idxs_V[i]]]; 4152 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4153 } 4154 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4155 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4156 sub_schurs->S_Ej_all = S_new; 4157 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4158 if (sub_schurs->sum_S_Ej_all) { 4159 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4160 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4161 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4162 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4163 sub_schurs->sum_S_Ej_all = S_new; 4164 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4165 } 4166 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4167 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4168 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4169 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4170 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4171 } 4172 } 4173 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4174 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4175 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4176 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4177 } 4178 4179 /* set up change of basis context */ 4180 if (pcbddc->ChangeOfBasisMatrix) { 4181 PCBDDCChange_ctx change_ctx; 4182 4183 if (!pcbddc->new_global_mat) { 4184 PetscInt global_size,local_size; 4185 4186 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4187 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4188 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4189 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4190 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4191 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4192 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4193 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4194 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4195 } else { 4196 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4197 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4198 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4199 } 4200 if (!pcbddc->user_ChangeOfBasisMatrix) { 4201 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4202 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4203 } else { 4204 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4205 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4206 } 4207 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4208 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4209 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4210 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4211 } else { 4212 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4213 } 4214 4215 /* add pressure dofs to set of primal nodes for numbering purposes */ 4216 for (i=0;i<pcbddc->benign_n;i++) { 4217 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4218 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4219 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4220 pcbddc->local_primal_size_cc++; 4221 pcbddc->local_primal_size++; 4222 } 4223 4224 /* check if a new primal space has been introduced (also take into account benign trick) */ 4225 pcbddc->new_primal_space_local = PETSC_TRUE; 4226 if (olocal_primal_size == pcbddc->local_primal_size) { 4227 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4228 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4229 if (!pcbddc->new_primal_space_local) { 4230 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4231 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4232 } 4233 } 4234 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4235 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4236 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4237 4238 /* flush dbg viewer */ 4239 if (pcbddc->dbg_flag) { 4240 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4241 } 4242 4243 /* free workspace */ 4244 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4245 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4246 if (!pcbddc->adaptive_selection) { 4247 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4248 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4249 } else { 4250 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4251 pcbddc->adaptive_constraints_idxs_ptr, 4252 pcbddc->adaptive_constraints_data_ptr, 4253 pcbddc->adaptive_constraints_idxs, 4254 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4255 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4256 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4257 } 4258 PetscFunctionReturn(0); 4259 } 4260 4261 #undef __FUNCT__ 4262 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4263 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4264 { 4265 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4266 PC_IS *pcis = (PC_IS*)pc->data; 4267 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4268 PetscInt ierr,i,vertex_size,N; 4269 PetscViewer viewer=pcbddc->dbg_viewer; 4270 4271 PetscFunctionBegin; 4272 /* Reset previously computed graph */ 4273 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4274 /* Init local Graph struct */ 4275 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4276 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4277 4278 /* Check validity of the csr graph passed in by the user */ 4279 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4280 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4281 } 4282 4283 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4284 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4285 PetscInt *xadj,*adjncy; 4286 PetscInt nvtxs; 4287 PetscBool flg_row=PETSC_FALSE; 4288 4289 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4290 if (flg_row) { 4291 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4292 pcbddc->computed_rowadj = PETSC_TRUE; 4293 } 4294 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4295 } 4296 if (pcbddc->dbg_flag) { 4297 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4298 } 4299 4300 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4301 vertex_size = 1; 4302 if (pcbddc->user_provided_isfordofs) { 4303 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4304 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4305 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4306 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4307 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4308 } 4309 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4310 pcbddc->n_ISForDofs = 0; 4311 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4312 } 4313 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4314 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4315 } else { 4316 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4317 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4318 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4319 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4320 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4321 } 4322 } 4323 } 4324 4325 /* Setup of Graph */ 4326 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4327 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4328 } 4329 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4330 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4331 } 4332 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4333 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4334 } 4335 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4336 4337 /* attach info on disconnected subdomains if present */ 4338 if (pcbddc->n_local_subs) { 4339 PetscInt *local_subs; 4340 4341 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4342 for (i=0;i<pcbddc->n_local_subs;i++) { 4343 const PetscInt *idxs; 4344 PetscInt nl,j; 4345 4346 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4347 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4348 for (j=0;j<nl;j++) { 4349 local_subs[idxs[j]] = i; 4350 } 4351 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4352 } 4353 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4354 pcbddc->mat_graph->local_subs = local_subs; 4355 } 4356 4357 /* Graph's connected components analysis */ 4358 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4359 4360 /* print some info to stdout */ 4361 if (pcbddc->dbg_flag) { 4362 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4363 } 4364 4365 /* mark topography has done */ 4366 pcbddc->recompute_topography = PETSC_FALSE; 4367 PetscFunctionReturn(0); 4368 } 4369 4370 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4371 #undef __FUNCT__ 4372 #define __FUNCT__ "PCBDDCSubsetNumbering" 4373 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4374 { 4375 PetscSF sf; 4376 PetscLayout map; 4377 const PetscInt *idxs; 4378 PetscInt *leaf_data,*root_data,*gidxs; 4379 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4380 PetscInt n_n,nlocals,start,first_index; 4381 PetscMPIInt commsize; 4382 PetscBool first_found; 4383 PetscErrorCode ierr; 4384 4385 PetscFunctionBegin; 4386 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4387 if (subset_mult) { 4388 PetscCheckSameComm(subset,1,subset_mult,2); 4389 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4390 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4391 } 4392 /* create workspace layout for computing global indices of subset */ 4393 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4394 lbounds[0] = lbounds[1] = 0; 4395 for (i=0;i<n;i++) { 4396 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4397 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4398 } 4399 lbounds[0] = -lbounds[0]; 4400 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4401 gbounds[0] = -gbounds[0]; 4402 N = gbounds[1] - gbounds[0] + 1; 4403 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4404 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4405 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4406 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4407 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4408 4409 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4410 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4411 if (subset_mult) { 4412 const PetscInt* idxs_mult; 4413 4414 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4415 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4416 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4417 } else { 4418 for (i=0;i<n;i++) leaf_data[i] = 1; 4419 } 4420 /* local size of new subset */ 4421 n_n = 0; 4422 for (i=0;i<n;i++) n_n += leaf_data[i]; 4423 4424 /* global indexes in layout */ 4425 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4426 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4427 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4428 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4429 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4430 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4431 4432 /* reduce from leaves to roots */ 4433 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4434 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4435 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4436 4437 /* count indexes in local part of layout */ 4438 nlocals = 0; 4439 first_index = -1; 4440 first_found = PETSC_FALSE; 4441 for (i=0;i<Nl;i++) { 4442 if (!first_found && root_data[i]) { 4443 first_found = PETSC_TRUE; 4444 first_index = i; 4445 } 4446 nlocals += root_data[i]; 4447 } 4448 4449 /* cumulative of number of indexes and size of subset without holes */ 4450 #if defined(PETSC_HAVE_MPI_EXSCAN) 4451 start = 0; 4452 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4453 #else 4454 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4455 start = start-nlocals; 4456 #endif 4457 4458 if (N_n) { /* compute total size of new subset if requested */ 4459 *N_n = start + nlocals; 4460 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4461 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4462 } 4463 4464 /* adapt root data with cumulative */ 4465 if (first_found) { 4466 PetscInt old_index; 4467 4468 root_data[first_index] += start; 4469 old_index = first_index; 4470 for (i=first_index+1;i<Nl;i++) { 4471 if (root_data[i]) { 4472 root_data[i] += root_data[old_index]; 4473 old_index = i; 4474 } 4475 } 4476 } 4477 4478 /* from roots to leaves */ 4479 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4480 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4481 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4482 4483 /* create new IS with global indexes without holes */ 4484 if (subset_mult) { 4485 const PetscInt* idxs_mult; 4486 PetscInt cum; 4487 4488 cum = 0; 4489 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4490 for (i=0;i<n;i++) { 4491 PetscInt j; 4492 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4493 } 4494 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4495 } else { 4496 for (i=0;i<n;i++) { 4497 gidxs[i] = leaf_data[i]-1; 4498 } 4499 } 4500 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4501 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4502 PetscFunctionReturn(0); 4503 } 4504 4505 #undef __FUNCT__ 4506 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4507 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4508 { 4509 PetscInt i,j; 4510 PetscScalar *alphas; 4511 PetscErrorCode ierr; 4512 4513 PetscFunctionBegin; 4514 /* this implements stabilized Gram-Schmidt */ 4515 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4516 for (i=0;i<n;i++) { 4517 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4518 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4519 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4520 } 4521 ierr = PetscFree(alphas);CHKERRQ(ierr); 4522 PetscFunctionReturn(0); 4523 } 4524 4525 #undef __FUNCT__ 4526 #define __FUNCT__ "MatISGetSubassemblingPattern" 4527 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4528 { 4529 IS ranks_send_to; 4530 PetscInt n_neighs,*neighs,*n_shared,**shared; 4531 PetscMPIInt size,rank,color; 4532 PetscInt *xadj,*adjncy; 4533 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4534 PetscInt i,local_size,threshold=0; 4535 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4536 PetscSubcomm subcomm; 4537 PetscErrorCode ierr; 4538 4539 PetscFunctionBegin; 4540 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4541 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4542 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4543 4544 /* Get info on mapping */ 4545 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4546 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4547 4548 /* build local CSR graph of subdomains' connectivity */ 4549 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4550 xadj[0] = 0; 4551 xadj[1] = PetscMax(n_neighs-1,0); 4552 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4553 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4554 4555 if (threshold) { 4556 PetscInt xadj_count = 0; 4557 for (i=1;i<n_neighs;i++) { 4558 if (n_shared[i] > threshold) { 4559 adjncy[xadj_count] = neighs[i]; 4560 adjncy_wgt[xadj_count] = n_shared[i]; 4561 xadj_count++; 4562 } 4563 } 4564 xadj[1] = xadj_count; 4565 } else { 4566 if (xadj[1]) { 4567 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4568 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4569 } 4570 } 4571 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4572 if (use_square) { 4573 for (i=0;i<xadj[1];i++) { 4574 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4575 } 4576 } 4577 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4578 4579 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4580 4581 /* 4582 Restrict work on active processes only. 4583 */ 4584 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4585 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4586 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4587 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4588 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4589 if (color) { 4590 ierr = PetscFree(xadj);CHKERRQ(ierr); 4591 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4592 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4593 } else { 4594 Mat subdomain_adj; 4595 IS new_ranks,new_ranks_contig; 4596 MatPartitioning partitioner; 4597 PetscInt prank,rstart=0,rend=0; 4598 PetscInt *is_indices,*oldranks; 4599 PetscBool aggregate; 4600 4601 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4602 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4603 prank = rank; 4604 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4605 /* 4606 for (i=0;i<size;i++) { 4607 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4608 } 4609 */ 4610 for (i=0;i<xadj[1];i++) { 4611 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4612 } 4613 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4614 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4615 if (aggregate) { 4616 PetscInt lrows,row,ncols,*cols; 4617 PetscMPIInt nrank; 4618 PetscScalar *vals; 4619 4620 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4621 lrows = 0; 4622 if (nrank<redprocs) { 4623 lrows = size/redprocs; 4624 if (nrank<size%redprocs) lrows++; 4625 } 4626 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4627 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4628 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4629 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4630 row = nrank; 4631 ncols = xadj[1]-xadj[0]; 4632 cols = adjncy; 4633 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4634 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4635 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4636 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4637 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4638 ierr = PetscFree(xadj);CHKERRQ(ierr); 4639 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4640 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4641 ierr = PetscFree(vals);CHKERRQ(ierr); 4642 } else { 4643 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4644 } 4645 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4646 4647 /* Partition */ 4648 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4649 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4650 if (use_vwgt) { 4651 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4652 v_wgt[0] = local_size; 4653 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4654 } 4655 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4656 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4657 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4658 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4659 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4660 4661 /* renumber new_ranks to avoid "holes" in new set of processors */ 4662 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4663 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4664 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4665 if (!redprocs) { 4666 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4667 } else { 4668 PetscInt idxs[1]; 4669 PetscMPIInt tag; 4670 MPI_Request *reqs; 4671 4672 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4673 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4674 for (i=rstart;i<rend;i++) { 4675 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4676 } 4677 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4678 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4679 ierr = PetscFree(reqs);CHKERRQ(ierr); 4680 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4681 } 4682 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4683 /* clean up */ 4684 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4685 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4686 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4687 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4688 } 4689 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4690 4691 /* assemble parallel IS for sends */ 4692 i = 1; 4693 if (color) i=0; 4694 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4695 /* get back IS */ 4696 *is_sends = ranks_send_to; 4697 PetscFunctionReturn(0); 4698 } 4699 4700 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4701 4702 #undef __FUNCT__ 4703 #define __FUNCT__ "MatISSubassemble" 4704 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4705 { 4706 Mat local_mat; 4707 IS is_sends_internal; 4708 PetscInt rows,cols,new_local_rows; 4709 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4710 PetscBool ismatis,isdense,newisdense,destroy_mat; 4711 ISLocalToGlobalMapping l2gmap; 4712 PetscInt* l2gmap_indices; 4713 const PetscInt* is_indices; 4714 MatType new_local_type; 4715 /* buffers */ 4716 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4717 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4718 PetscInt *recv_buffer_idxs_local; 4719 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4720 /* MPI */ 4721 MPI_Comm comm,comm_n; 4722 PetscSubcomm subcomm; 4723 PetscMPIInt n_sends,n_recvs,commsize; 4724 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4725 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4726 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4727 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4728 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4729 PetscErrorCode ierr; 4730 4731 PetscFunctionBegin; 4732 /* TODO: add missing checks */ 4733 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4734 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4735 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4736 PetscValidLogicalCollectiveInt(mat,nis,7); 4737 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4738 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4739 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4740 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4741 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4742 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4743 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4744 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4745 PetscInt mrows,mcols,mnrows,mncols; 4746 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4747 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4748 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4749 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4750 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4751 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4752 } 4753 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4754 PetscValidLogicalCollectiveInt(mat,bs,0); 4755 /* prepare IS for sending if not provided */ 4756 if (!is_sends) { 4757 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4758 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4759 } else { 4760 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4761 is_sends_internal = is_sends; 4762 } 4763 4764 /* get comm */ 4765 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4766 4767 /* compute number of sends */ 4768 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4769 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4770 4771 /* compute number of receives */ 4772 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4773 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4774 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4775 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4776 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4777 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4778 ierr = PetscFree(iflags);CHKERRQ(ierr); 4779 4780 /* restrict comm if requested */ 4781 subcomm = 0; 4782 destroy_mat = PETSC_FALSE; 4783 if (restrict_comm) { 4784 PetscMPIInt color,subcommsize; 4785 4786 color = 0; 4787 if (restrict_full) { 4788 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4789 } else { 4790 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4791 } 4792 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4793 subcommsize = commsize - subcommsize; 4794 /* check if reuse has been requested */ 4795 if (reuse == MAT_REUSE_MATRIX) { 4796 if (*mat_n) { 4797 PetscMPIInt subcommsize2; 4798 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4799 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4800 comm_n = PetscObjectComm((PetscObject)*mat_n); 4801 } else { 4802 comm_n = PETSC_COMM_SELF; 4803 } 4804 } else { /* MAT_INITIAL_MATRIX */ 4805 PetscMPIInt rank; 4806 4807 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4808 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4809 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4810 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4811 comm_n = PetscSubcommChild(subcomm); 4812 } 4813 /* flag to destroy *mat_n if not significative */ 4814 if (color) destroy_mat = PETSC_TRUE; 4815 } else { 4816 comm_n = comm; 4817 } 4818 4819 /* prepare send/receive buffers */ 4820 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4821 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4822 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4823 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4824 if (nis) { 4825 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4826 } 4827 4828 /* Get data from local matrices */ 4829 if (!isdense) { 4830 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4831 /* TODO: See below some guidelines on how to prepare the local buffers */ 4832 /* 4833 send_buffer_vals should contain the raw values of the local matrix 4834 send_buffer_idxs should contain: 4835 - MatType_PRIVATE type 4836 - PetscInt size_of_l2gmap 4837 - PetscInt global_row_indices[size_of_l2gmap] 4838 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4839 */ 4840 } else { 4841 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4842 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4843 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4844 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4845 send_buffer_idxs[1] = i; 4846 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4847 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4848 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4849 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4850 for (i=0;i<n_sends;i++) { 4851 ilengths_vals[is_indices[i]] = len*len; 4852 ilengths_idxs[is_indices[i]] = len+2; 4853 } 4854 } 4855 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4856 /* additional is (if any) */ 4857 if (nis) { 4858 PetscMPIInt psum; 4859 PetscInt j; 4860 for (j=0,psum=0;j<nis;j++) { 4861 PetscInt plen; 4862 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4863 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4864 psum += len+1; /* indices + lenght */ 4865 } 4866 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4867 for (j=0,psum=0;j<nis;j++) { 4868 PetscInt plen; 4869 const PetscInt *is_array_idxs; 4870 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4871 send_buffer_idxs_is[psum] = plen; 4872 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4873 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4874 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4875 psum += plen+1; /* indices + lenght */ 4876 } 4877 for (i=0;i<n_sends;i++) { 4878 ilengths_idxs_is[is_indices[i]] = psum; 4879 } 4880 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4881 } 4882 4883 buf_size_idxs = 0; 4884 buf_size_vals = 0; 4885 buf_size_idxs_is = 0; 4886 for (i=0;i<n_recvs;i++) { 4887 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4888 buf_size_vals += (PetscInt)olengths_vals[i]; 4889 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4890 } 4891 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4892 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4893 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4894 4895 /* get new tags for clean communications */ 4896 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4897 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4898 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4899 4900 /* allocate for requests */ 4901 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4902 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4903 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4904 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4905 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4906 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4907 4908 /* communications */ 4909 ptr_idxs = recv_buffer_idxs; 4910 ptr_vals = recv_buffer_vals; 4911 ptr_idxs_is = recv_buffer_idxs_is; 4912 for (i=0;i<n_recvs;i++) { 4913 source_dest = onodes[i]; 4914 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4915 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4916 ptr_idxs += olengths_idxs[i]; 4917 ptr_vals += olengths_vals[i]; 4918 if (nis) { 4919 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4920 ptr_idxs_is += olengths_idxs_is[i]; 4921 } 4922 } 4923 for (i=0;i<n_sends;i++) { 4924 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4925 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4926 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4927 if (nis) { 4928 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4929 } 4930 } 4931 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4932 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4933 4934 /* assemble new l2g map */ 4935 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4936 ptr_idxs = recv_buffer_idxs; 4937 new_local_rows = 0; 4938 for (i=0;i<n_recvs;i++) { 4939 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4940 ptr_idxs += olengths_idxs[i]; 4941 } 4942 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4943 ptr_idxs = recv_buffer_idxs; 4944 new_local_rows = 0; 4945 for (i=0;i<n_recvs;i++) { 4946 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4947 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4948 ptr_idxs += olengths_idxs[i]; 4949 } 4950 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4951 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4952 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4953 4954 /* infer new local matrix type from received local matrices type */ 4955 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4956 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4957 if (n_recvs) { 4958 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4959 ptr_idxs = recv_buffer_idxs; 4960 for (i=0;i<n_recvs;i++) { 4961 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4962 new_local_type_private = MATAIJ_PRIVATE; 4963 break; 4964 } 4965 ptr_idxs += olengths_idxs[i]; 4966 } 4967 switch (new_local_type_private) { 4968 case MATDENSE_PRIVATE: 4969 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4970 new_local_type = MATSEQAIJ; 4971 bs = 1; 4972 } else { /* if I receive only 1 dense matrix */ 4973 new_local_type = MATSEQDENSE; 4974 bs = 1; 4975 } 4976 break; 4977 case MATAIJ_PRIVATE: 4978 new_local_type = MATSEQAIJ; 4979 bs = 1; 4980 break; 4981 case MATBAIJ_PRIVATE: 4982 new_local_type = MATSEQBAIJ; 4983 break; 4984 case MATSBAIJ_PRIVATE: 4985 new_local_type = MATSEQSBAIJ; 4986 break; 4987 default: 4988 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4989 break; 4990 } 4991 } else { /* by default, new_local_type is seqdense */ 4992 new_local_type = MATSEQDENSE; 4993 bs = 1; 4994 } 4995 4996 /* create MATIS object if needed */ 4997 if (reuse == MAT_INITIAL_MATRIX) { 4998 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4999 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5000 } else { 5001 /* it also destroys the local matrices */ 5002 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5003 } 5004 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5005 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5006 5007 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5008 5009 /* Global to local map of received indices */ 5010 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5011 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5012 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5013 5014 /* restore attributes -> type of incoming data and its size */ 5015 buf_size_idxs = 0; 5016 for (i=0;i<n_recvs;i++) { 5017 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5018 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5019 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5020 } 5021 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5022 5023 /* set preallocation */ 5024 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5025 if (!newisdense) { 5026 PetscInt *new_local_nnz=0; 5027 5028 ptr_vals = recv_buffer_vals; 5029 ptr_idxs = recv_buffer_idxs_local; 5030 if (n_recvs) { 5031 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5032 } 5033 for (i=0;i<n_recvs;i++) { 5034 PetscInt j; 5035 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5036 for (j=0;j<*(ptr_idxs+1);j++) { 5037 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5038 } 5039 } else { 5040 /* TODO */ 5041 } 5042 ptr_idxs += olengths_idxs[i]; 5043 } 5044 if (new_local_nnz) { 5045 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5046 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5047 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5048 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5049 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5050 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5051 } else { 5052 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5053 } 5054 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5055 } else { 5056 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5057 } 5058 5059 /* set values */ 5060 ptr_vals = recv_buffer_vals; 5061 ptr_idxs = recv_buffer_idxs_local; 5062 for (i=0;i<n_recvs;i++) { 5063 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5064 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5065 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5066 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5067 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5068 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5069 } else { 5070 /* TODO */ 5071 } 5072 ptr_idxs += olengths_idxs[i]; 5073 ptr_vals += olengths_vals[i]; 5074 } 5075 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5076 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5077 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5078 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5079 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5080 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5081 5082 #if 0 5083 if (!restrict_comm) { /* check */ 5084 Vec lvec,rvec; 5085 PetscReal infty_error; 5086 5087 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5088 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5089 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5090 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5091 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5092 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5093 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5094 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5095 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5096 } 5097 #endif 5098 5099 /* assemble new additional is (if any) */ 5100 if (nis) { 5101 PetscInt **temp_idxs,*count_is,j,psum; 5102 5103 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5104 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5105 ptr_idxs = recv_buffer_idxs_is; 5106 psum = 0; 5107 for (i=0;i<n_recvs;i++) { 5108 for (j=0;j<nis;j++) { 5109 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5110 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5111 psum += plen; 5112 ptr_idxs += plen+1; /* shift pointer to received data */ 5113 } 5114 } 5115 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5116 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5117 for (i=1;i<nis;i++) { 5118 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5119 } 5120 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5121 ptr_idxs = recv_buffer_idxs_is; 5122 for (i=0;i<n_recvs;i++) { 5123 for (j=0;j<nis;j++) { 5124 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5125 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5126 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5127 ptr_idxs += plen+1; /* shift pointer to received data */ 5128 } 5129 } 5130 for (i=0;i<nis;i++) { 5131 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5132 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5133 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5134 } 5135 ierr = PetscFree(count_is);CHKERRQ(ierr); 5136 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5137 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5138 } 5139 /* free workspace */ 5140 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5141 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5142 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5143 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5144 if (isdense) { 5145 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5146 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5147 } else { 5148 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5149 } 5150 if (nis) { 5151 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5152 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5153 } 5154 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5155 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5156 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5157 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5158 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5159 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5160 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5161 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5162 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5163 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5164 ierr = PetscFree(onodes);CHKERRQ(ierr); 5165 if (nis) { 5166 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5167 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5168 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5169 } 5170 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5171 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5172 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5173 for (i=0;i<nis;i++) { 5174 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5175 } 5176 *mat_n = NULL; 5177 } 5178 PetscFunctionReturn(0); 5179 } 5180 5181 /* temporary hack into ksp private data structure */ 5182 #include <petsc/private/kspimpl.h> 5183 5184 #undef __FUNCT__ 5185 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5186 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5187 { 5188 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5189 PC_IS *pcis = (PC_IS*)pc->data; 5190 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5191 MatNullSpace CoarseNullSpace=NULL; 5192 ISLocalToGlobalMapping coarse_islg; 5193 IS coarse_is,*isarray; 5194 PetscInt i,im_active=-1,active_procs=-1; 5195 PetscInt nis,nisdofs,nisneu,nisvert; 5196 PC pc_temp; 5197 PCType coarse_pc_type; 5198 KSPType coarse_ksp_type; 5199 PetscBool multilevel_requested,multilevel_allowed; 5200 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5201 Mat t_coarse_mat_is; 5202 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5203 PetscMPIInt all_procs; 5204 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5205 PetscBool compute_vecs = PETSC_FALSE; 5206 PetscScalar *array; 5207 PetscErrorCode ierr; 5208 5209 PetscFunctionBegin; 5210 /* Assign global numbering to coarse dofs */ 5211 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5212 PetscInt ocoarse_size; 5213 compute_vecs = PETSC_TRUE; 5214 ocoarse_size = pcbddc->coarse_size; 5215 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5216 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5217 /* see if we can avoid some work */ 5218 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5219 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5220 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5221 PC pc; 5222 PetscBool isbddc; 5223 5224 /* temporary workaround since PCBDDC does not have a reset method so far */ 5225 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5226 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5227 if (isbddc) { 5228 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5229 } else { 5230 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5231 } 5232 coarse_reuse = PETSC_FALSE; 5233 } else { /* we can safely reuse already computed coarse matrix */ 5234 coarse_reuse = PETSC_TRUE; 5235 } 5236 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5237 coarse_reuse = PETSC_FALSE; 5238 } 5239 /* reset any subassembling information */ 5240 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5241 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5242 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5243 coarse_reuse = PETSC_TRUE; 5244 } 5245 5246 /* count "active" (i.e. with positive local size) and "void" processes */ 5247 im_active = !!(pcis->n); 5248 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5249 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5250 void_procs = all_procs-active_procs; 5251 csin_type_simple = PETSC_TRUE; 5252 redist = PETSC_FALSE; 5253 if (pcbddc->current_level && void_procs) { 5254 csin_ml = PETSC_TRUE; 5255 ncoarse_ml = void_procs; 5256 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5257 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5258 csin_ds = PETSC_TRUE; 5259 ncoarse_ds = pcbddc->redistribute_coarse; 5260 redist = PETSC_TRUE; 5261 } else { 5262 csin_ds = PETSC_TRUE; 5263 ncoarse_ds = active_procs; 5264 redist = PETSC_TRUE; 5265 } 5266 } else { 5267 csin_ml = PETSC_FALSE; 5268 ncoarse_ml = all_procs; 5269 if (void_procs) { 5270 csin_ds = PETSC_TRUE; 5271 ncoarse_ds = void_procs; 5272 csin_type_simple = PETSC_FALSE; 5273 } else { 5274 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5275 csin_ds = PETSC_TRUE; 5276 ncoarse_ds = pcbddc->redistribute_coarse; 5277 redist = PETSC_TRUE; 5278 } else { 5279 csin_ds = PETSC_FALSE; 5280 ncoarse_ds = all_procs; 5281 } 5282 } 5283 } 5284 5285 /* 5286 test if we can go multilevel: three conditions must be satisfied: 5287 - we have not exceeded the number of levels requested 5288 - we can actually subassemble the active processes 5289 - we can find a suitable number of MPI processes where we can place the subassembled problem 5290 */ 5291 multilevel_allowed = PETSC_FALSE; 5292 multilevel_requested = PETSC_FALSE; 5293 if (pcbddc->current_level < pcbddc->max_levels) { 5294 multilevel_requested = PETSC_TRUE; 5295 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5296 multilevel_allowed = PETSC_FALSE; 5297 } else { 5298 multilevel_allowed = PETSC_TRUE; 5299 } 5300 } 5301 /* determine number of process partecipating to coarse solver */ 5302 if (multilevel_allowed) { 5303 ncoarse = ncoarse_ml; 5304 csin = csin_ml; 5305 redist = PETSC_FALSE; 5306 } else { 5307 ncoarse = ncoarse_ds; 5308 csin = csin_ds; 5309 } 5310 5311 /* creates temporary l2gmap and IS for coarse indexes */ 5312 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5313 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5314 5315 /* creates temporary MATIS object for coarse matrix */ 5316 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5317 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5318 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5319 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5320 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5321 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5322 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5323 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5324 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5325 5326 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5327 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5328 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5329 const PetscInt *idxs; 5330 ISLocalToGlobalMapping tmap; 5331 5332 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5333 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5334 /* allocate space for temporary storage */ 5335 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5336 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5337 /* allocate for IS array */ 5338 nisdofs = pcbddc->n_ISForDofsLocal; 5339 nisneu = !!pcbddc->NeumannBoundariesLocal; 5340 nisvert = 0; 5341 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5342 nisvert = 1; 5343 } 5344 nis = nisdofs + nisneu + nisvert; 5345 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5346 /* dofs splitting */ 5347 for (i=0;i<nisdofs;i++) { 5348 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5349 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5350 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5351 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5352 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5353 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5354 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5355 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5356 } 5357 /* neumann boundaries */ 5358 if (pcbddc->NeumannBoundariesLocal) { 5359 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5360 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5361 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5362 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5363 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5364 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5365 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5366 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5367 } 5368 /* primal vertices (benign) */ 5369 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5370 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5371 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5372 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5373 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5374 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5375 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5376 } 5377 /* free memory */ 5378 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5379 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5380 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5381 } else { 5382 nis = 0; 5383 nisdofs = 0; 5384 nisneu = 0; 5385 nisvert = 0; 5386 isarray = NULL; 5387 } 5388 /* destroy no longer needed map */ 5389 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5390 5391 /* restrict on coarse candidates (if needed) */ 5392 coarse_mat_is = NULL; 5393 if (csin) { 5394 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5395 if (redist) { 5396 PetscMPIInt rank; 5397 PetscInt spc,n_spc_p1,dest[1],destsize; 5398 5399 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5400 spc = active_procs/ncoarse; 5401 n_spc_p1 = active_procs%ncoarse; 5402 if (im_active) { 5403 destsize = 1; 5404 if (rank > n_spc_p1*(spc+1)-1) { 5405 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5406 } else { 5407 dest[0] = rank/(spc+1); 5408 } 5409 } else { 5410 destsize = 0; 5411 } 5412 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5413 } else if (csin_type_simple) { 5414 PetscMPIInt rank; 5415 PetscInt issize,isidx; 5416 5417 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5418 if (im_active) { 5419 issize = 1; 5420 isidx = (PetscInt)rank; 5421 } else { 5422 issize = 0; 5423 isidx = -1; 5424 } 5425 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5426 } else { /* get a suitable subassembling pattern from MATIS code */ 5427 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5428 } 5429 5430 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5431 if (!redist || ncoarse <= void_procs) { 5432 PetscInt ncoarse_cand,tissize,*nisindices; 5433 PetscInt *coarse_candidates; 5434 const PetscInt* tisindices; 5435 5436 /* get coarse candidates' ranks in pc communicator */ 5437 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5438 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5439 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5440 if (!coarse_candidates[i]) { 5441 coarse_candidates[ncoarse_cand++]=i; 5442 } 5443 } 5444 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5445 5446 5447 if (pcbddc->dbg_flag) { 5448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5449 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5450 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5451 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5452 for (i=0;i<ncoarse_cand;i++) { 5453 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5454 } 5455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5456 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5457 } 5458 /* shift the pattern on coarse candidates */ 5459 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5460 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5461 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5462 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5463 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5464 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5465 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5466 } 5467 if (pcbddc->dbg_flag) { 5468 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5470 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5471 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5472 } 5473 } 5474 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5475 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5476 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5477 } else { /* this is the last level, so use just receiving processes in subcomm */ 5478 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5479 } 5480 } else { 5481 if (pcbddc->dbg_flag) { 5482 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5484 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5485 } 5486 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5487 coarse_mat_is = t_coarse_mat_is; 5488 } 5489 5490 /* create local to global scatters for coarse problem */ 5491 if (compute_vecs) { 5492 PetscInt lrows; 5493 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5494 if (coarse_mat_is) { 5495 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5496 } else { 5497 lrows = 0; 5498 } 5499 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5500 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5501 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5502 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5503 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5504 } 5505 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5506 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5507 5508 /* set defaults for coarse KSP and PC */ 5509 if (multilevel_allowed) { 5510 coarse_ksp_type = KSPRICHARDSON; 5511 coarse_pc_type = PCBDDC; 5512 } else { 5513 coarse_ksp_type = KSPPREONLY; 5514 coarse_pc_type = PCREDUNDANT; 5515 } 5516 5517 /* print some info if requested */ 5518 if (pcbddc->dbg_flag) { 5519 if (!multilevel_allowed) { 5520 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5521 if (multilevel_requested) { 5522 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5523 } else if (pcbddc->max_levels) { 5524 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5525 } 5526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5527 } 5528 } 5529 5530 /* create the coarse KSP object only once with defaults */ 5531 if (coarse_mat_is) { 5532 MatReuse coarse_mat_reuse; 5533 PetscViewer dbg_viewer = NULL; 5534 if (pcbddc->dbg_flag) { 5535 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5536 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5537 } 5538 if (!pcbddc->coarse_ksp) { 5539 char prefix[256],str_level[16]; 5540 size_t len; 5541 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5542 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5543 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5544 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5545 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5546 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5547 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5548 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5549 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5550 /* prefix */ 5551 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5552 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5553 if (!pcbddc->current_level) { 5554 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5555 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5556 } else { 5557 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5558 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5559 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5560 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5561 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5562 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5563 } 5564 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5565 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5566 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5567 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5568 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5569 /* allow user customization */ 5570 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5571 } 5572 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5573 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5574 if (nisdofs) { 5575 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5576 for (i=0;i<nisdofs;i++) { 5577 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5578 } 5579 } 5580 if (nisneu) { 5581 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5582 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5583 } 5584 if (nisvert) { 5585 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5586 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5587 } 5588 5589 /* get some info after set from options */ 5590 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5591 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5592 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5593 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5594 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5595 isbddc = PETSC_FALSE; 5596 } 5597 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5598 if (isredundant) { 5599 KSP inner_ksp; 5600 PC inner_pc; 5601 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5602 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5603 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5604 } 5605 5606 /* assemble coarse matrix */ 5607 if (coarse_reuse) { 5608 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5609 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5610 coarse_mat_reuse = MAT_REUSE_MATRIX; 5611 } else { 5612 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5613 } 5614 if (isbddc || isnn) { 5615 if (isbddc) { /* currently there's no API for this */ 5616 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5617 pcbddc->detect_disconnected = PETSC_TRUE; 5618 } 5619 if (pcbddc->coarsening_ratio > 1) { 5620 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5621 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5622 if (pcbddc->dbg_flag) { 5623 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5624 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5625 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5626 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5627 } 5628 } 5629 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5630 } else { 5631 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5632 coarse_mat = coarse_mat_is; 5633 } 5634 } else { 5635 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5636 } 5637 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5638 5639 /* propagate symmetry info of coarse matrix */ 5640 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5641 if (pc->pmat->symmetric_set) { 5642 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5643 } 5644 if (pc->pmat->hermitian_set) { 5645 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5646 } 5647 if (pc->pmat->spd_set) { 5648 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5649 } 5650 /* set operators */ 5651 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5652 if (pcbddc->dbg_flag) { 5653 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5654 } 5655 } else { /* processes non partecipating to coarse solver (if any) */ 5656 coarse_mat = 0; 5657 } 5658 ierr = PetscFree(isarray);CHKERRQ(ierr); 5659 #if 0 5660 { 5661 PetscViewer viewer; 5662 char filename[256]; 5663 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5664 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5665 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5666 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5667 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5668 } 5669 #endif 5670 5671 /* Compute coarse null space (special handling by BDDC only) */ 5672 #if 0 5673 if (pcbddc->NullSpace) { 5674 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5675 } 5676 #endif 5677 /* hack */ 5678 if (pcbddc->coarse_ksp) { 5679 Vec crhs,csol; 5680 5681 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5682 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5683 if (!csol) { 5684 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5685 } 5686 if (!crhs) { 5687 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5688 } 5689 } 5690 5691 /* compute null space for coarse solver if the benign trick has been requested */ 5692 if (pcbddc->benign_null) { 5693 5694 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5695 for (i=0;i<pcbddc->benign_n;i++) { 5696 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5697 } 5698 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5699 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5700 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5701 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5702 if (coarse_mat) { 5703 Vec nullv; 5704 PetscScalar *array,*array2; 5705 PetscInt nl; 5706 5707 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5708 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5709 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5710 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5711 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5712 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5713 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5714 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5715 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5716 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5717 } 5718 } 5719 5720 if (pcbddc->coarse_ksp) { 5721 PetscBool ispreonly; 5722 5723 if (CoarseNullSpace) { 5724 PetscBool isnull; 5725 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5726 if (0) { 5727 if (isbddc && !pcbddc->benign_saddle_point) { 5728 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5729 } else { 5730 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5731 } 5732 } else { 5733 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5734 } 5735 } 5736 /* setup coarse ksp */ 5737 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5738 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5739 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5740 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5741 KSP check_ksp; 5742 KSPType check_ksp_type; 5743 PC check_pc; 5744 Vec check_vec,coarse_vec; 5745 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5746 PetscInt its; 5747 PetscBool compute_eigs; 5748 PetscReal *eigs_r,*eigs_c; 5749 PetscInt neigs; 5750 const char *prefix; 5751 5752 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5753 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5754 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5755 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5756 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5757 if (ispreonly) { 5758 check_ksp_type = KSPPREONLY; 5759 compute_eigs = PETSC_FALSE; 5760 } else { 5761 check_ksp_type = KSPGMRES; 5762 compute_eigs = PETSC_TRUE; 5763 } 5764 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5765 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5766 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5767 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5768 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5769 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5770 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5771 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5772 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5773 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5774 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5775 /* create random vec */ 5776 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5777 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5778 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5779 if (CoarseNullSpace) { 5780 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5781 } 5782 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5783 /* solve coarse problem */ 5784 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5785 if (CoarseNullSpace) { 5786 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5787 } 5788 /* set eigenvalue estimation if preonly has not been requested */ 5789 if (compute_eigs) { 5790 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5791 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5792 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5793 lambda_max = eigs_r[neigs-1]; 5794 lambda_min = eigs_r[0]; 5795 if (pcbddc->use_coarse_estimates) { 5796 if (lambda_max>lambda_min) { 5797 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5798 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5799 } 5800 } 5801 } 5802 5803 /* check coarse problem residual error */ 5804 if (pcbddc->dbg_flag) { 5805 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5806 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5807 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5808 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5809 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5810 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5811 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5812 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5813 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5814 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5815 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5816 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5817 if (CoarseNullSpace) { 5818 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5819 } 5820 if (compute_eigs) { 5821 PetscReal lambda_max_s,lambda_min_s; 5822 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5823 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5824 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5825 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5826 for (i=0;i<neigs;i++) { 5827 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5828 } 5829 } 5830 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5831 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5832 } 5833 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5834 if (compute_eigs) { 5835 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5836 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5837 } 5838 } 5839 } 5840 /* print additional info */ 5841 if (pcbddc->dbg_flag) { 5842 /* waits until all processes reaches this point */ 5843 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5844 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5845 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5846 } 5847 5848 /* free memory */ 5849 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5850 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5851 PetscFunctionReturn(0); 5852 } 5853 5854 #undef __FUNCT__ 5855 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5856 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5857 { 5858 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5859 PC_IS* pcis = (PC_IS*)pc->data; 5860 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5861 IS subset,subset_mult,subset_n; 5862 PetscInt local_size,coarse_size=0; 5863 PetscInt *local_primal_indices=NULL; 5864 const PetscInt *t_local_primal_indices; 5865 PetscErrorCode ierr; 5866 5867 PetscFunctionBegin; 5868 /* Compute global number of coarse dofs */ 5869 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5870 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5871 } 5872 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5873 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5874 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5875 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5876 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5877 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5878 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5879 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5880 if (local_size != pcbddc->local_primal_size) { 5881 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5882 } 5883 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5884 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5885 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5886 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5887 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5888 5889 /* check numbering */ 5890 if (pcbddc->dbg_flag) { 5891 PetscScalar coarsesum,*array,*array2; 5892 PetscInt i; 5893 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5894 5895 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5896 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5897 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5898 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5899 /* counter */ 5900 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5901 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5902 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5903 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5904 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5905 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5906 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5907 for (i=0;i<pcbddc->local_primal_size;i++) { 5908 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5909 } 5910 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5911 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5912 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5913 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5914 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5915 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5916 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5917 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5918 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5919 for (i=0;i<pcis->n;i++) { 5920 if (array[i] != 0.0 && array[i] != array2[i]) { 5921 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5922 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5923 set_error = PETSC_TRUE; 5924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5925 } 5926 } 5927 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5928 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5929 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5930 for (i=0;i<pcis->n;i++) { 5931 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5932 } 5933 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5934 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5935 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5936 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5937 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5938 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5939 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5940 PetscInt *gidxs; 5941 5942 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5943 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5944 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5945 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5947 for (i=0;i<pcbddc->local_primal_size;i++) { 5948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5949 } 5950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5951 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5952 } 5953 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5954 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5955 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5956 } 5957 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5958 /* get back data */ 5959 *coarse_size_n = coarse_size; 5960 *local_primal_indices_n = local_primal_indices; 5961 PetscFunctionReturn(0); 5962 } 5963 5964 #undef __FUNCT__ 5965 #define __FUNCT__ "PCBDDCGlobalToLocal" 5966 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5967 { 5968 IS localis_t; 5969 PetscInt i,lsize,*idxs,n; 5970 PetscScalar *vals; 5971 PetscErrorCode ierr; 5972 5973 PetscFunctionBegin; 5974 /* get indices in local ordering exploiting local to global map */ 5975 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5976 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5977 for (i=0;i<lsize;i++) vals[i] = 1.0; 5978 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5979 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5980 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5981 if (idxs) { /* multilevel guard */ 5982 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5983 } 5984 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5985 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5986 ierr = PetscFree(vals);CHKERRQ(ierr); 5987 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5988 /* now compute set in local ordering */ 5989 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5990 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5991 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5992 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5993 for (i=0,lsize=0;i<n;i++) { 5994 if (PetscRealPart(vals[i]) > 0.5) { 5995 lsize++; 5996 } 5997 } 5998 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5999 for (i=0,lsize=0;i<n;i++) { 6000 if (PetscRealPart(vals[i]) > 0.5) { 6001 idxs[lsize++] = i; 6002 } 6003 } 6004 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6005 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6006 *localis = localis_t; 6007 PetscFunctionReturn(0); 6008 } 6009 6010 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6011 #undef __FUNCT__ 6012 #define __FUNCT__ "PCBDDCMatMult_Private" 6013 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6014 { 6015 PCBDDCChange_ctx change_ctx; 6016 PetscErrorCode ierr; 6017 6018 PetscFunctionBegin; 6019 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6020 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6021 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6022 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6023 PetscFunctionReturn(0); 6024 } 6025 6026 #undef __FUNCT__ 6027 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6028 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6029 { 6030 PCBDDCChange_ctx change_ctx; 6031 PetscErrorCode ierr; 6032 6033 PetscFunctionBegin; 6034 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6035 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6036 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6037 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6038 PetscFunctionReturn(0); 6039 } 6040 6041 #undef __FUNCT__ 6042 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6043 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6044 { 6045 PC_IS *pcis=(PC_IS*)pc->data; 6046 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6047 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6048 Mat S_j; 6049 PetscInt *used_xadj,*used_adjncy; 6050 PetscBool free_used_adj; 6051 PetscErrorCode ierr; 6052 6053 PetscFunctionBegin; 6054 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6055 free_used_adj = PETSC_FALSE; 6056 if (pcbddc->sub_schurs_layers == -1) { 6057 used_xadj = NULL; 6058 used_adjncy = NULL; 6059 } else { 6060 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6061 used_xadj = pcbddc->mat_graph->xadj; 6062 used_adjncy = pcbddc->mat_graph->adjncy; 6063 } else if (pcbddc->computed_rowadj) { 6064 used_xadj = pcbddc->mat_graph->xadj; 6065 used_adjncy = pcbddc->mat_graph->adjncy; 6066 } else { 6067 PetscBool flg_row=PETSC_FALSE; 6068 const PetscInt *xadj,*adjncy; 6069 PetscInt nvtxs; 6070 6071 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6072 if (flg_row) { 6073 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6074 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6075 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6076 free_used_adj = PETSC_TRUE; 6077 } else { 6078 pcbddc->sub_schurs_layers = -1; 6079 used_xadj = NULL; 6080 used_adjncy = NULL; 6081 } 6082 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6083 } 6084 } 6085 6086 /* setup sub_schurs data */ 6087 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6088 if (!sub_schurs->schur_explicit) { 6089 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6090 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6091 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6092 } else { 6093 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6094 PetscBool isseqaij; 6095 PetscInt benign_n; 6096 6097 if (!pcbddc->use_vertices && reuse_solvers) { 6098 PetscInt n_vertices; 6099 6100 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6101 reuse_solvers = (PetscBool)!n_vertices; 6102 } 6103 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6104 if (!isseqaij) { 6105 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6106 if (matis->A == pcbddc->local_mat) { 6107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6108 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6109 } else { 6110 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6111 } 6112 } 6113 if (!pcbddc->benign_change_explicit) { 6114 benign_n = pcbddc->benign_n; 6115 } else { 6116 benign_n = 0; 6117 } 6118 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6119 } 6120 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6121 6122 /* free adjacency */ 6123 if (free_used_adj) { 6124 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6125 } 6126 PetscFunctionReturn(0); 6127 } 6128 6129 #undef __FUNCT__ 6130 #define __FUNCT__ "PCBDDCInitSubSchurs" 6131 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6132 { 6133 PC_IS *pcis=(PC_IS*)pc->data; 6134 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6135 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6136 PCBDDCGraph graph; 6137 PetscErrorCode ierr; 6138 6139 PetscFunctionBegin; 6140 /* attach interface graph for determining subsets */ 6141 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6142 IS verticesIS,verticescomm; 6143 PetscInt vsize,*idxs; 6144 6145 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6146 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6147 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6149 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6150 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6151 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6152 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6153 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6154 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6155 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6156 /* 6157 if (pcbddc->dbg_flag) { 6158 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6159 } 6160 */ 6161 } else { 6162 graph = pcbddc->mat_graph; 6163 } 6164 6165 /* sub_schurs init */ 6166 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6167 6168 /* free graph struct */ 6169 if (pcbddc->sub_schurs_rebuild) { 6170 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6171 } 6172 PetscFunctionReturn(0); 6173 } 6174 6175 #undef __FUNCT__ 6176 #define __FUNCT__ "PCBDDCCheckOperator" 6177 PetscErrorCode PCBDDCCheckOperator(PC pc) 6178 { 6179 PC_IS *pcis=(PC_IS*)pc->data; 6180 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6181 PetscErrorCode ierr; 6182 6183 PetscFunctionBegin; 6184 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6185 IS zerodiag = NULL; 6186 Mat S_j,B0_B=NULL; 6187 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6188 PetscScalar *p0_check,*array,*array2; 6189 PetscReal norm; 6190 PetscInt i; 6191 6192 /* B0 and B0_B */ 6193 if (zerodiag) { 6194 IS dummy; 6195 6196 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6197 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6198 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6199 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6200 } 6201 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6202 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6203 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6204 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6205 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6206 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6207 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6208 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6209 /* S_j */ 6210 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6211 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6212 6213 /* mimic vector in \widetilde{W}_\Gamma */ 6214 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6215 /* continuous in primal space */ 6216 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6217 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6218 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6219 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6220 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6221 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6222 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6223 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6224 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6225 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6226 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6227 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6228 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6229 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6230 6231 /* assemble rhs for coarse problem */ 6232 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6233 /* local with Schur */ 6234 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6235 if (zerodiag) { 6236 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6237 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6238 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6239 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6240 } 6241 /* sum on primal nodes the local contributions */ 6242 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6243 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6244 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6245 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6246 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6247 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6248 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6249 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6250 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6251 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6252 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6253 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6254 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6255 /* scale primal nodes (BDDC sums contibutions) */ 6256 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6257 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6258 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6259 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6260 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6261 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6262 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6263 /* global: \widetilde{B0}_B w_\Gamma */ 6264 if (zerodiag) { 6265 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6266 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6267 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6268 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6269 } 6270 /* BDDC */ 6271 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6272 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6273 6274 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6275 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6276 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6277 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6278 for (i=0;i<pcbddc->benign_n;i++) { 6279 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6280 } 6281 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6282 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6283 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6284 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6285 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6286 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6287 } 6288 PetscFunctionReturn(0); 6289 } 6290