1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 PetscInt *nnz; 799 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 800 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 801 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 802 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 803 for (i=0;i<pcbddc->benign_n;i++) { 804 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 805 nnz[i] = n - nnz[i]; 806 } 807 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 808 ierr = PetscFree(nnz);CHKERRQ(ierr); 809 } 810 811 for (i=0;i<pcbddc->benign_n;i++) { 812 PetscScalar *array; 813 PetscInt *idxs,j,nz,cum; 814 815 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 818 for (j=0;j<nz;j++) vals[j] = 1.; 819 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 823 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 824 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 825 cum = 0; 826 for (j=0;j<n;j++) { 827 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 828 vals[cum] = array[j]; 829 idxs_ins[cum] = j; 830 cum++; 831 } 832 } 833 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 834 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 835 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 840 } 841 } else { /* push */ 842 if (pcbddc->benign_change_explicit) { 843 PetscInt i; 844 845 for (i=0;i<pcbddc->benign_n;i++) { 846 PetscScalar *B0_vals; 847 PetscInt *B0_cols,B0_ncol; 848 849 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 850 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 851 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 852 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 853 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 859 } 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCAdaptiveSelection" 866 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 867 { 868 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 869 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 870 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 871 PetscBLASInt *B_iwork,*B_ifail; 872 PetscScalar *work,lwork; 873 PetscScalar *St,*S,*eigv; 874 PetscScalar *Sarray,*Starray; 875 PetscReal *eigs,thresh; 876 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 877 PetscBool allocated_S_St; 878 #if defined(PETSC_USE_COMPLEX) 879 PetscReal *rwork; 880 #endif 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 if (!sub_schurs->schur_explicit) { 885 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 886 } 887 888 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 if (pcbddc->dbg_flag) { 893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 if (pcbddc->dbg_flag) { 900 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 901 } 902 903 /* max size of subsets */ 904 mss = 0; 905 for (i=0;i<sub_schurs->n_subs;i++) { 906 PetscInt subset_size; 907 908 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 909 mss = PetscMax(mss,subset_size); 910 } 911 912 /* min/max and threshold */ 913 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 914 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 915 nmax = PetscMax(nmin,nmax); 916 allocated_S_St = PETSC_FALSE; 917 if (nmin) { 918 allocated_S_St = PETSC_TRUE; 919 } 920 921 /* allocate lapack workspace */ 922 cum = cum2 = 0; 923 maxneigs = 0; 924 for (i=0;i<sub_schurs->n_subs;i++) { 925 PetscInt n,subset_size; 926 927 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 928 n = PetscMin(subset_size,nmax); 929 cum += subset_size; 930 cum2 += subset_size*n; 931 maxneigs = PetscMax(maxneigs,n); 932 } 933 if (mss) { 934 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 935 PetscBLASInt B_itype = 1; 936 PetscBLASInt B_N = mss; 937 PetscReal zero = 0.0; 938 PetscReal eps = 0.0; /* dlamch? */ 939 940 B_lwork = -1; 941 S = NULL; 942 St = NULL; 943 eigs = NULL; 944 eigv = NULL; 945 B_iwork = NULL; 946 B_ifail = NULL; 947 #if defined(PETSC_USE_COMPLEX) 948 rwork = NULL; 949 #endif 950 thresh = 1.0; 951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 952 #if defined(PETSC_USE_COMPLEX) 953 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 954 #else 955 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 956 #endif 957 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 958 ierr = PetscFPTrapPop();CHKERRQ(ierr); 959 } else { 960 /* TODO */ 961 } 962 } else { 963 lwork = 0; 964 } 965 966 nv = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 968 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 969 } 970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 971 if (allocated_S_St) { 972 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 973 } 974 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 977 #endif 978 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 979 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 980 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 981 nv+cum,&pcbddc->adaptive_constraints_idxs, 982 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 983 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 984 985 maxneigs = 0; 986 cum = cum2 = cumarray = 0; 987 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 988 pcbddc->adaptive_constraints_data_ptr[0] = 0; 989 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 990 const PetscInt *idxs; 991 992 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 993 for (cum=0;cum<nv;cum++) { 994 pcbddc->adaptive_constraints_n[cum] = 1; 995 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 996 pcbddc->adaptive_constraints_data[cum] = 1.0; 997 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 998 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 999 } 1000 cum2 = cum; 1001 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1002 } 1003 1004 if (mss) { /* multilevel */ 1005 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1006 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1007 } 1008 1009 thresh = pcbddc->adaptive_threshold; 1010 for (i=0;i<sub_schurs->n_subs;i++) { 1011 const PetscInt *idxs; 1012 PetscReal infty = PETSC_MAX_REAL; 1013 PetscInt j,subset_size,eigs_start = 0; 1014 PetscBLASInt B_N; 1015 PetscBool same_data = PETSC_FALSE; 1016 1017 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1018 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1019 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1020 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1021 if (sub_schurs->is_hermitian) { 1022 PetscInt j,k; 1023 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1024 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1026 } 1027 for (j=0;j<subset_size;j++) { 1028 for (k=j;k<subset_size;k++) { 1029 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1030 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1031 } 1032 } 1033 } else { 1034 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1035 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1036 } 1037 } else { 1038 S = Sarray + cumarray; 1039 St = Starray + cumarray; 1040 } 1041 1042 /* see if we can save some work */ 1043 if (sub_schurs->n_subs == 1) { 1044 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1045 } 1046 1047 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1048 B_neigs = 0; 1049 } else { 1050 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1051 PetscBLASInt B_itype = 1; 1052 PetscBLASInt B_IL, B_IU; 1053 PetscReal eps = -1.0; /* dlamch? */ 1054 PetscInt nmin_s; 1055 1056 if (pcbddc->dbg_flag) { 1057 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1058 } 1059 1060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1061 if (thresh > 1.+PETSC_SMALL) { 1062 1063 /* ask for eigenvalues larger than thresh */ 1064 #if defined(PETSC_USE_COMPLEX) 1065 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1066 #else 1067 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1068 #endif 1069 } else { 1070 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1071 B_IL = 1; 1072 #if defined(PETSC_USE_COMPLEX) 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1074 #else 1075 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1076 #endif 1077 } 1078 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1079 if (B_ierr) { 1080 if (B_ierr < 0 ) { 1081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1082 } else if (B_ierr <= B_N) { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1084 } else { 1085 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1086 } 1087 } 1088 1089 if (B_neigs > nmax) { 1090 if (pcbddc->dbg_flag) { 1091 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1092 } 1093 eigs_start = B_neigs -nmax; 1094 B_neigs = nmax; 1095 } 1096 1097 nmin_s = PetscMin(nmin,B_N); 1098 if (B_neigs < nmin_s) { 1099 PetscBLASInt B_neigs2; 1100 1101 B_IU = B_N - B_neigs; 1102 B_IL = B_N - nmin_s + 1; 1103 if (pcbddc->dbg_flag) { 1104 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1105 } 1106 if (sub_schurs->is_hermitian) { 1107 PetscInt j; 1108 for (j=0;j<subset_size;j++) { 1109 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1110 } 1111 for (j=0;j<subset_size;j++) { 1112 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1113 } 1114 } else { 1115 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1116 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1117 } 1118 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1119 #if defined(PETSC_USE_COMPLEX) 1120 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1121 #else 1122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1123 #endif 1124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1125 B_neigs += B_neigs2; 1126 } 1127 if (B_ierr) { 1128 if (B_ierr < 0 ) { 1129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1130 } else if (B_ierr <= B_N) { 1131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1132 } else { 1133 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1134 } 1135 } 1136 if (pcbddc->dbg_flag) { 1137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1138 for (j=0;j<B_neigs;j++) { 1139 if (eigs[j] == 0.0) { 1140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1141 } else { 1142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1143 } 1144 } 1145 } 1146 } else { 1147 /* TODO */ 1148 } 1149 } 1150 maxneigs = PetscMax(B_neigs,maxneigs); 1151 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1152 if (B_neigs) { 1153 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1154 1155 if (pcbddc->dbg_flag > 1) { 1156 PetscInt ii; 1157 for (ii=0;ii<B_neigs;ii++) { 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1159 for (j=0;j<B_N;j++) { 1160 #if defined(PETSC_USE_COMPLEX) 1161 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1162 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1164 #else 1165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1166 #endif 1167 } 1168 } 1169 } 1170 #if 0 1171 for (j=0;j<B_neigs;j++) { 1172 PetscBLASInt Blas_N,Blas_one = 1.0; 1173 PetscScalar norm; 1174 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1175 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1176 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1177 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1178 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1179 } else { 1180 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1181 } 1182 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1183 } 1184 #endif 1185 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1186 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1187 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1188 cum++; 1189 } 1190 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1191 /* shift for next computation */ 1192 cumarray += subset_size*subset_size; 1193 } 1194 if (pcbddc->dbg_flag) { 1195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1196 } 1197 1198 if (mss) { 1199 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1200 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1201 /* destroy matrices (junk) */ 1202 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1203 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1204 } 1205 if (allocated_S_St) { 1206 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1207 } 1208 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1209 #if defined(PETSC_USE_COMPLEX) 1210 ierr = PetscFree(rwork);CHKERRQ(ierr); 1211 #endif 1212 if (pcbddc->dbg_flag) { 1213 PetscInt maxneigs_r; 1214 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "PCBDDCSetUpSolvers" 1222 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1223 { 1224 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1225 PetscScalar *coarse_submat_vals; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1230 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1231 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1232 1233 /* Setup local neumann solver ksp_R */ 1234 /* PCBDDCSetUpLocalScatters should be called first! */ 1235 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1236 1237 /* Change global null space passed in by the user if change of basis has been requested */ 1238 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1239 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1240 } 1241 1242 /* 1243 Setup local correction and local part of coarse basis. 1244 Gives back the dense local part of the coarse matrix in column major ordering 1245 */ 1246 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1247 1248 /* Compute total number of coarse nodes and setup coarse solver */ 1249 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1250 1251 /* free */ 1252 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "PCBDDCResetCustomization" 1258 PetscErrorCode PCBDDCResetCustomization(PC pc) 1259 { 1260 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1265 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1266 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1267 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1268 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1269 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1270 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1271 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1272 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1273 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1274 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1275 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "PCBDDCResetTopography" 1281 PetscErrorCode PCBDDCResetTopography(PC pc) 1282 { 1283 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1284 PetscInt i; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1289 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1290 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1291 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1292 for (i=0;i<pcbddc->n_local_subs;i++) { 1293 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1296 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "PCBDDCResetSolvers" 1302 PetscErrorCode PCBDDCResetSolvers(PC pc) 1303 { 1304 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1309 if (pcbddc->coarse_phi_B) { 1310 PetscScalar *array; 1311 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1312 ierr = PetscFree(array);CHKERRQ(ierr); 1313 } 1314 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1315 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1316 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1317 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1318 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1319 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1320 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1321 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1322 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1323 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1324 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1325 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1328 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1329 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1330 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1331 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1332 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1333 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1334 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1335 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1336 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1338 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1339 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1341 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1342 if (pcbddc->benign_zerodiag_subs) { 1343 PetscInt i; 1344 for (i=0;i<pcbddc->benign_n;i++) { 1345 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1348 } 1349 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1355 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1356 { 1357 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1358 PC_IS *pcis = (PC_IS*)pc->data; 1359 VecType impVecType; 1360 PetscInt n_constraints,n_R,old_size; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (!pcbddc->ConstraintMatrix) { 1365 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1366 } 1367 /* get sizes */ 1368 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1369 n_R = pcis->n - pcbddc->n_vertices; 1370 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1371 /* local work vectors (try to avoid unneeded work)*/ 1372 /* R nodes */ 1373 old_size = -1; 1374 if (pcbddc->vec1_R) { 1375 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1376 } 1377 if (n_R != old_size) { 1378 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1379 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1380 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1381 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1382 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1383 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1384 } 1385 /* local primal dofs */ 1386 old_size = -1; 1387 if (pcbddc->vec1_P) { 1388 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1389 } 1390 if (pcbddc->local_primal_size != old_size) { 1391 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1392 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1393 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1394 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1395 } 1396 /* local explicit constraints */ 1397 old_size = -1; 1398 if (pcbddc->vec1_C) { 1399 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1400 } 1401 if (n_constraints && n_constraints != old_size) { 1402 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1403 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1404 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1405 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "PCBDDCSetUpCorrection" 1412 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1413 { 1414 PetscErrorCode ierr; 1415 /* pointers to pcis and pcbddc */ 1416 PC_IS* pcis = (PC_IS*)pc->data; 1417 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1418 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1419 /* submatrices of local problem */ 1420 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1421 /* submatrices of benign trick */ 1422 Mat B0_V = NULL; 1423 /* submatrices of local coarse problem */ 1424 Mat S_VV,S_CV,S_VC,S_CC; 1425 /* working matrices */ 1426 Mat C_CR; 1427 /* additional working stuff */ 1428 PC pc_R; 1429 Mat F; 1430 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1431 1432 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1433 PetscScalar *work; 1434 PetscInt *idx_V_B; 1435 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1436 PetscInt i,n_R,n_D,n_B; 1437 1438 /* some shortcuts to scalars */ 1439 PetscScalar one=1.0,m_one=-1.0; 1440 1441 PetscFunctionBegin; 1442 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1444 } 1445 1446 /* Set Non-overlapping dimensions */ 1447 n_vertices = pcbddc->n_vertices; 1448 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1449 n_B = pcis->n_B; 1450 n_D = pcis->n - n_B; 1451 n_R = pcis->n - n_vertices; 1452 1453 /* vertices in boundary numbering */ 1454 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1455 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1456 if (i != n_vertices) { 1457 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1458 } 1459 1460 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1461 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1462 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1463 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1464 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1465 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1466 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1467 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1468 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1469 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1470 1471 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1472 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1473 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1474 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1475 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1476 lda_rhs = n_R; 1477 need_benign_correction = PETSC_FALSE; 1478 if (isLU || isILU || isCHOL) { 1479 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1480 } else if (sub_schurs->reuse_solver) { 1481 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1482 MatFactorType type; 1483 1484 F = reuse_solver->F; 1485 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1486 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1487 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1488 need_benign_correction = !!reuse_solver->benign_n; 1489 } else { 1490 F = NULL; 1491 } 1492 1493 /* allocate workspace */ 1494 n = 0; 1495 if (n_constraints) { 1496 n += lda_rhs*n_constraints; 1497 } 1498 if (n_vertices) { 1499 n = PetscMax(2*lda_rhs*n_vertices,n); 1500 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1501 } 1502 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1503 1504 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1505 if (n_constraints) { 1506 Mat M1,M2,M3; 1507 Mat auxmat; 1508 IS is_aux; 1509 PetscScalar *array,*array2; 1510 1511 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1512 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1513 1514 /* Extract constraints on R nodes: C_{CR} */ 1515 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1516 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1517 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1518 1519 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1520 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1521 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1522 for (i=0;i<n_constraints;i++) { 1523 const PetscScalar *row_cmat_values; 1524 const PetscInt *row_cmat_indices; 1525 PetscInt size_of_constraint,j; 1526 1527 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1528 for (j=0;j<size_of_constraint;j++) { 1529 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1530 } 1531 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1532 } 1533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1534 if (F) { 1535 Mat B; 1536 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1538 if (need_benign_correction) { 1539 PetscScalar *marr; 1540 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1541 1542 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1543 for (i=0;i<n_constraints;i++) { 1544 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1545 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1546 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1547 } 1548 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1549 } 1550 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1551 if (need_benign_correction) { 1552 PetscScalar *marr; 1553 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1554 1555 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1556 for (i=0;i<n_constraints;i++) { 1557 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1558 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1559 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1560 } 1561 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1562 } 1563 ierr = MatDestroy(&B);CHKERRQ(ierr); 1564 } else { 1565 PetscScalar *marr; 1566 1567 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1568 for (i=0;i<n_constraints;i++) { 1569 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1570 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1571 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1572 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1573 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1574 } 1575 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1576 } 1577 if (!pcbddc->switch_static) { 1578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1579 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1580 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1581 for (i=0;i<n_constraints;i++) { 1582 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1583 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1587 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1588 } 1589 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1590 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1591 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1592 } else { 1593 if (lda_rhs != n_R) { 1594 IS dummy; 1595 1596 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1597 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1598 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1599 } else { 1600 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1601 pcbddc->local_auxmat2 = local_auxmat2_R; 1602 } 1603 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1604 } 1605 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1606 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1607 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1608 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1609 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1610 if (isCHOL) { 1611 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1612 } else { 1613 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1614 } 1615 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1616 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1617 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1618 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1619 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1620 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1621 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1622 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1623 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1624 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1625 } 1626 /* Get submatrices from subdomain matrix */ 1627 if (pcbddc->benign_n) { 1628 IS dummy; 1629 Mat B0_R; 1630 PetscReal norm; 1631 1632 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1633 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1634 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1635 if (norm > PETSC_SMALL) { 1636 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1637 } 1638 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1639 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1640 } 1641 1642 if (n_vertices) { 1643 IS is_aux; 1644 1645 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1646 IS tis; 1647 1648 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1649 ierr = ISSort(tis);CHKERRQ(ierr); 1650 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1651 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1652 } else { 1653 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1654 } 1655 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1656 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1657 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1658 if (pcbddc->benign_n) { 1659 IS dummy; 1660 1661 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1662 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1663 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1664 } 1665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1666 } 1667 1668 /* Matrix of coarse basis functions (local) */ 1669 if (pcbddc->coarse_phi_B) { 1670 PetscInt on_B,on_primal,on_D=n_D; 1671 if (pcbddc->coarse_phi_D) { 1672 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1673 } 1674 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1675 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1676 PetscScalar *marray; 1677 1678 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1679 ierr = PetscFree(marray);CHKERRQ(ierr); 1680 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1681 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1682 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1683 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1684 } 1685 } 1686 1687 if (!pcbddc->coarse_phi_B) { 1688 PetscScalar *marray; 1689 1690 n = n_B*pcbddc->local_primal_size; 1691 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1692 n += n_D*pcbddc->local_primal_size; 1693 } 1694 if (!pcbddc->symmetric_primal) { 1695 n *= 2; 1696 } 1697 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1698 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1699 n = n_B*pcbddc->local_primal_size; 1700 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1702 n += n_D*pcbddc->local_primal_size; 1703 } 1704 if (!pcbddc->symmetric_primal) { 1705 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1707 n = n_B*pcbddc->local_primal_size; 1708 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1709 } 1710 } else { 1711 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1712 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1713 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1714 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1715 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1716 } 1717 } 1718 } 1719 1720 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1721 p0_lidx_I = NULL; 1722 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1723 const PetscInt *idxs; 1724 1725 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1726 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1727 for (i=0;i<pcbddc->benign_n;i++) { 1728 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1729 } 1730 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1731 } 1732 1733 /* vertices */ 1734 if (n_vertices) { 1735 1736 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1737 1738 if (n_R) { 1739 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1740 PetscBLASInt B_N,B_one = 1; 1741 PetscScalar *x,*y; 1742 PetscBool isseqaij; 1743 1744 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1745 if (need_benign_correction) { 1746 ISLocalToGlobalMapping RtoN; 1747 IS is_p0; 1748 PetscInt *idxs_p0,n; 1749 1750 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1751 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1752 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1753 if (n != pcbddc->benign_n) { 1754 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1755 } 1756 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1757 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1758 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1759 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1760 } 1761 1762 if (lda_rhs == n_R) { 1763 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1764 } else { 1765 PetscScalar *av,*array; 1766 const PetscInt *xadj,*adjncy; 1767 PetscInt n; 1768 PetscBool flg_row; 1769 1770 array = work+lda_rhs*n_vertices; 1771 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1772 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1774 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1775 for (i=0;i<n;i++) { 1776 PetscInt j; 1777 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1778 } 1779 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1780 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1781 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1782 } 1783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1784 if (F) { 1785 if (need_benign_correction) { 1786 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1787 PetscScalar *marr; 1788 1789 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1790 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1791 1792 | 0 0 0 | (V) 1793 L = | 0 0 -1 | (P-p0) 1794 | 0 0 -1 | (p0) 1795 1796 */ 1797 for (i=0;i<reuse_solver->benign_n;i++) { 1798 const PetscScalar *vals; 1799 const PetscInt *idxs,*idxs_zero; 1800 PetscInt n,j,nz; 1801 1802 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1803 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1804 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1805 for (j=0;j<n;j++) { 1806 PetscScalar val = vals[j]; 1807 PetscInt k,col = idxs[j]; 1808 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1809 } 1810 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1811 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1812 } 1813 /* need to correct the rhs */ 1814 for (i=0;i<n_vertices;i++) { 1815 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1816 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1817 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1818 } 1819 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1820 } 1821 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1822 /* need to correct the solution */ 1823 if (need_benign_correction) { 1824 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1825 PetscScalar *marr; 1826 1827 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1828 for (i=0;i<n_vertices;i++) { 1829 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1830 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1831 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1832 } 1833 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1834 } 1835 } else { 1836 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1837 for (i=0;i<n_vertices;i++) { 1838 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1839 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1840 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1841 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1842 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1843 } 1844 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1845 } 1846 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1847 /* S_VV and S_CV */ 1848 if (n_constraints) { 1849 Mat B; 1850 1851 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1852 for (i=0;i<n_vertices;i++) { 1853 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1854 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1858 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1859 } 1860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1861 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1862 ierr = MatDestroy(&B);CHKERRQ(ierr); 1863 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1864 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1865 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1867 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1868 ierr = MatDestroy(&B);CHKERRQ(ierr); 1869 } 1870 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1871 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1872 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1873 } 1874 if (lda_rhs != n_R) { 1875 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1877 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1878 } 1879 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1880 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1881 if (need_benign_correction) { 1882 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1883 PetscScalar *marr,*sums; 1884 1885 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1886 ierr = MatDenseGetArray(S_VVt,&marr); 1887 for (i=0;i<reuse_solver->benign_n;i++) { 1888 const PetscScalar *vals; 1889 const PetscInt *idxs,*idxs_zero; 1890 PetscInt n,j,nz; 1891 1892 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1893 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1894 for (j=0;j<n_vertices;j++) { 1895 PetscInt k; 1896 sums[j] = 0.; 1897 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1898 } 1899 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1900 for (j=0;j<n;j++) { 1901 PetscScalar val = vals[j]; 1902 PetscInt k; 1903 for (k=0;k<n_vertices;k++) { 1904 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1905 } 1906 } 1907 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1908 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1909 } 1910 ierr = PetscFree(sums);CHKERRQ(ierr); 1911 ierr = MatDenseRestoreArray(S_VVt,&marr); 1912 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1913 } 1914 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1915 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1916 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1917 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1918 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1919 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1920 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1921 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1922 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1923 } else { 1924 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1925 } 1926 for (i=0;i<pcbddc->benign_n;i++) { 1927 const PetscScalar *vals; 1928 const PetscInt *idxs; 1929 PetscInt n,j,primal_idx; 1930 1931 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1932 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1933 for (j=0;j<n;j++) { 1934 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1935 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1936 } 1937 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1938 } 1939 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1940 1941 /* coarse basis functions */ 1942 for (i=0;i<n_vertices;i++) { 1943 PetscScalar *y; 1944 1945 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1946 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1947 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1948 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1950 y[n_B*i+idx_V_B[i]] = 1.0; 1951 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1952 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1953 1954 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1955 PetscInt j; 1956 1957 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1958 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1959 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1962 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1963 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1964 } 1965 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1966 } 1967 /* if n_R == 0 the object is not destroyed */ 1968 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1969 } 1970 1971 if (n_constraints) { 1972 Mat B; 1973 1974 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1975 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1976 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1977 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1978 if (n_vertices) { 1979 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1980 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1981 } else { 1982 Mat S_VCt; 1983 1984 if (lda_rhs != n_R) { 1985 ierr = MatDestroy(&B);CHKERRQ(ierr); 1986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1987 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1988 } 1989 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1990 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1991 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1992 } 1993 } 1994 ierr = MatDestroy(&B);CHKERRQ(ierr); 1995 /* coarse basis functions */ 1996 for (i=0;i<n_constraints;i++) { 1997 PetscScalar *y; 1998 1999 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2000 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2005 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2006 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2007 PetscInt j; 2008 2009 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2010 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2011 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2014 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2015 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2016 } 2017 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2018 } 2019 } 2020 if (n_constraints) { 2021 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2022 } 2023 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2024 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2025 2026 /* compute other basis functions for non-symmetric problems */ 2027 if (!pcbddc->symmetric_primal) { 2028 Mat B_V=NULL,B_C=NULL; 2029 PetscScalar *marray; 2030 2031 if (n_constraints) { 2032 Mat S_CCT,C_CRT; 2033 2034 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2035 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2036 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2037 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2038 if (n_vertices) { 2039 Mat S_VCT; 2040 2041 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2042 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2043 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2044 } 2045 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2046 } 2047 if (n_vertices && n_R) { 2048 PetscScalar *av,*marray; 2049 const PetscInt *xadj,*adjncy; 2050 PetscInt n; 2051 PetscBool flg_row; 2052 2053 /* B_V = B_V - A_VR^T */ 2054 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2055 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2056 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2057 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2058 for (i=0;i<n;i++) { 2059 PetscInt j; 2060 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2061 } 2062 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2063 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2064 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2065 } 2066 2067 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2068 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2069 for (i=0;i<n_vertices;i++) { 2070 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2071 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2072 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2073 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2074 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2075 } 2076 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2077 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2078 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2079 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2080 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2081 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2082 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2083 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2084 } 2085 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2086 /* coarse basis functions */ 2087 for (i=0;i<pcbddc->local_primal_size;i++) { 2088 PetscScalar *y; 2089 2090 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2091 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2092 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2093 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 if (i<n_vertices) { 2096 y[n_B*i+idx_V_B[i]] = 1.0; 2097 } 2098 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2099 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2100 2101 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2102 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2103 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2104 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2106 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2107 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2108 } 2109 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2110 } 2111 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2112 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2113 } 2114 /* free memory */ 2115 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2116 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2117 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2118 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2119 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2120 ierr = PetscFree(work);CHKERRQ(ierr); 2121 if (n_vertices) { 2122 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2123 } 2124 if (n_constraints) { 2125 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2126 } 2127 /* Checking coarse_sub_mat and coarse basis functios */ 2128 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2129 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2130 if (pcbddc->dbg_flag) { 2131 Mat coarse_sub_mat; 2132 Mat AUXMAT,TM1,TM2,TM3,TM4; 2133 Mat coarse_phi_D,coarse_phi_B; 2134 Mat coarse_psi_D,coarse_psi_B; 2135 Mat A_II,A_BB,A_IB,A_BI; 2136 Mat C_B,CPHI; 2137 IS is_dummy; 2138 Vec mones; 2139 MatType checkmattype=MATSEQAIJ; 2140 PetscReal real_value; 2141 2142 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2143 Mat A; 2144 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2145 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2146 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2147 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2148 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2149 ierr = MatDestroy(&A);CHKERRQ(ierr); 2150 } else { 2151 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2152 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2153 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2154 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2155 } 2156 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2157 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2158 if (!pcbddc->symmetric_primal) { 2159 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2160 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2161 } 2162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2163 2164 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2166 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2167 if (!pcbddc->symmetric_primal) { 2168 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2169 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2170 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2171 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2172 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2173 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2174 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2175 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2176 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2177 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2178 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2179 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2180 } else { 2181 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2182 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2183 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2184 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2185 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2186 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2187 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2188 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2189 } 2190 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2191 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2192 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2193 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2194 if (pcbddc->benign_n) { 2195 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2196 PetscScalar *data,*data2; 2197 PetscInt j; 2198 2199 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2200 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2201 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2202 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2203 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2204 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2205 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2206 for (j=0;j<pcbddc->benign_n;j++) { 2207 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2208 for (i=0;i<pcbddc->local_primal_size;i++) { 2209 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2210 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2211 } 2212 } 2213 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2214 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2215 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2216 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2217 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2218 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2219 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2220 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2221 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2222 } 2223 #if 0 2224 { 2225 PetscViewer viewer; 2226 char filename[256]; 2227 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2228 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2229 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2230 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2231 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2232 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2233 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2234 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2235 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2236 if (pcbddc->coarse_phi_D) { 2237 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2238 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2239 } 2240 if (pcbddc->coarse_psi_B) { 2241 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2242 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2243 } 2244 if (pcbddc->coarse_phi_B) { 2245 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2246 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2247 } 2248 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2249 } 2250 #endif 2251 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2252 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2254 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2255 2256 /* check constraints */ 2257 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2258 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2259 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2260 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2261 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2262 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2263 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2264 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2266 if (!pcbddc->symmetric_primal) { 2267 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2268 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2269 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2270 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2272 } 2273 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2274 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2275 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2276 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2277 } 2278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2279 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2280 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2281 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2282 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2283 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2284 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2285 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2286 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2287 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2288 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2289 if (!pcbddc->symmetric_primal) { 2290 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2291 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2292 } 2293 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2294 } 2295 /* get back data */ 2296 *coarse_submat_vals_n = coarse_submat_vals; 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2302 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2303 { 2304 Mat *work_mat; 2305 IS isrow_s,iscol_s; 2306 PetscBool rsorted,csorted; 2307 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2312 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2313 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2314 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2315 2316 if (!rsorted) { 2317 const PetscInt *idxs; 2318 PetscInt *idxs_sorted,i; 2319 2320 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2321 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2322 for (i=0;i<rsize;i++) { 2323 idxs_perm_r[i] = i; 2324 } 2325 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2326 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2327 for (i=0;i<rsize;i++) { 2328 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2329 } 2330 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2331 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2332 } else { 2333 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2334 isrow_s = isrow; 2335 } 2336 2337 if (!csorted) { 2338 if (isrow == iscol) { 2339 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2340 iscol_s = isrow_s; 2341 } else { 2342 const PetscInt *idxs; 2343 PetscInt *idxs_sorted,i; 2344 2345 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2346 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2347 for (i=0;i<csize;i++) { 2348 idxs_perm_c[i] = i; 2349 } 2350 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2351 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2352 for (i=0;i<csize;i++) { 2353 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2354 } 2355 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2356 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2357 } 2358 } else { 2359 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2360 iscol_s = iscol; 2361 } 2362 2363 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2364 2365 if (!rsorted || !csorted) { 2366 Mat new_mat; 2367 IS is_perm_r,is_perm_c; 2368 2369 if (!rsorted) { 2370 PetscInt *idxs_r,i; 2371 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2372 for (i=0;i<rsize;i++) { 2373 idxs_r[idxs_perm_r[i]] = i; 2374 } 2375 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2376 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2377 } else { 2378 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2379 } 2380 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2381 2382 if (!csorted) { 2383 if (isrow_s == iscol_s) { 2384 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2385 is_perm_c = is_perm_r; 2386 } else { 2387 PetscInt *idxs_c,i; 2388 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2389 for (i=0;i<csize;i++) { 2390 idxs_c[idxs_perm_c[i]] = i; 2391 } 2392 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2394 } 2395 } else { 2396 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2397 } 2398 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2399 2400 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2401 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2402 work_mat[0] = new_mat; 2403 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2404 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2405 } 2406 2407 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2408 *B = work_mat[0]; 2409 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2410 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2411 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2417 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2418 { 2419 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2420 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2421 Mat new_mat; 2422 IS is_local,is_global; 2423 PetscInt local_size; 2424 PetscBool isseqaij; 2425 PetscErrorCode ierr; 2426 2427 PetscFunctionBegin; 2428 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2429 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2430 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2431 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2432 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2433 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2434 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2435 2436 /* check */ 2437 if (pcbddc->dbg_flag) { 2438 Vec x,x_change; 2439 PetscReal error; 2440 2441 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2442 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2443 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2444 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2445 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2446 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2447 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2448 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2449 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2450 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2451 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2453 ierr = VecDestroy(&x);CHKERRQ(ierr); 2454 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2455 } 2456 2457 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2458 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2459 if (isseqaij) { 2460 Mat M; 2461 2462 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2463 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2464 ierr = MatDestroy(&M);CHKERRQ(ierr); 2465 } else { 2466 Mat work_mat,M; 2467 2468 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2469 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2470 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2471 ierr = MatDestroy(&M);CHKERRQ(ierr); 2472 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2473 } 2474 if (matis->A->symmetric_set) { 2475 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2476 #if !defined(PETSC_USE_COMPLEX) 2477 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2478 #endif 2479 } 2480 /* 2481 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2482 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2483 */ 2484 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2490 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2491 { 2492 PC_IS* pcis = (PC_IS*)(pc->data); 2493 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2494 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2495 PetscInt *idx_R_local=NULL; 2496 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2497 PetscInt vbs,bs; 2498 PetscBT bitmask=NULL; 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 /* 2503 No need to setup local scatters if 2504 - primal space is unchanged 2505 AND 2506 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2507 AND 2508 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2509 */ 2510 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2511 PetscFunctionReturn(0); 2512 } 2513 /* destroy old objects */ 2514 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2515 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2516 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2517 /* Set Non-overlapping dimensions */ 2518 n_B = pcis->n_B; 2519 n_D = pcis->n - n_B; 2520 n_vertices = pcbddc->n_vertices; 2521 2522 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2523 2524 /* create auxiliary bitmask and allocate workspace */ 2525 if (!sub_schurs->reuse_solver) { 2526 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2527 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2528 for (i=0;i<n_vertices;i++) { 2529 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2530 } 2531 2532 for (i=0, n_R=0; i<pcis->n; i++) { 2533 if (!PetscBTLookup(bitmask,i)) { 2534 idx_R_local[n_R++] = i; 2535 } 2536 } 2537 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2538 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2539 2540 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2541 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2542 } 2543 2544 /* Block code */ 2545 vbs = 1; 2546 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2547 if (bs>1 && !(n_vertices%bs)) { 2548 PetscBool is_blocked = PETSC_TRUE; 2549 PetscInt *vary; 2550 if (!sub_schurs->reuse_solver) { 2551 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2552 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2553 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2554 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2555 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2556 for (i=0; i<pcis->n/bs; i++) { 2557 if (vary[i]!=0 && vary[i]!=bs) { 2558 is_blocked = PETSC_FALSE; 2559 break; 2560 } 2561 } 2562 ierr = PetscFree(vary);CHKERRQ(ierr); 2563 } else { 2564 /* Verify directly the R set */ 2565 for (i=0; i<n_R/bs; i++) { 2566 PetscInt j,node=idx_R_local[bs*i]; 2567 for (j=1; j<bs; j++) { 2568 if (node != idx_R_local[bs*i+j]-j) { 2569 is_blocked = PETSC_FALSE; 2570 break; 2571 } 2572 } 2573 } 2574 } 2575 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2576 vbs = bs; 2577 for (i=0;i<n_R/vbs;i++) { 2578 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2579 } 2580 } 2581 } 2582 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2583 if (sub_schurs->reuse_solver) { 2584 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2585 2586 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2587 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2588 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2589 reuse_solver->is_R = pcbddc->is_R_local; 2590 } else { 2591 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2592 } 2593 2594 /* print some info if requested */ 2595 if (pcbddc->dbg_flag) { 2596 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2597 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2598 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2602 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2603 } 2604 2605 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2606 if (!sub_schurs->reuse_solver) { 2607 IS is_aux1,is_aux2; 2608 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2609 2610 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2611 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2612 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2613 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2614 for (i=0; i<n_D; i++) { 2615 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2616 } 2617 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2618 for (i=0, j=0; i<n_R; i++) { 2619 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2620 aux_array1[j++] = i; 2621 } 2622 } 2623 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2624 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2625 for (i=0, j=0; i<n_B; i++) { 2626 if (!PetscBTLookup(bitmask,is_indices[i])) { 2627 aux_array2[j++] = i; 2628 } 2629 } 2630 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2631 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2632 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2633 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2634 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2635 2636 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2637 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2638 for (i=0, j=0; i<n_R; i++) { 2639 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2640 aux_array1[j++] = i; 2641 } 2642 } 2643 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2644 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2645 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2646 } 2647 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2648 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2649 } else { 2650 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2651 IS tis; 2652 PetscInt schur_size; 2653 2654 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2655 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2656 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2657 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2658 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2659 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2660 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2661 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2662 } 2663 } 2664 PetscFunctionReturn(0); 2665 } 2666 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2670 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2671 { 2672 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2673 PC_IS *pcis = (PC_IS*)pc->data; 2674 PC pc_temp; 2675 Mat A_RR; 2676 MatReuse reuse; 2677 PetscScalar m_one = -1.0; 2678 PetscReal value; 2679 PetscInt n_D,n_R; 2680 PetscBool use_exact,use_exact_reduced,issbaij; 2681 PetscErrorCode ierr; 2682 /* prefixes stuff */ 2683 char dir_prefix[256],neu_prefix[256],str_level[16]; 2684 size_t len; 2685 2686 PetscFunctionBegin; 2687 2688 /* compute prefixes */ 2689 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2690 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2691 if (!pcbddc->current_level) { 2692 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2693 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2694 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2695 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2696 } else { 2697 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2698 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2699 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2700 len -= 15; /* remove "pc_bddc_coarse_" */ 2701 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2702 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2703 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2704 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2705 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2706 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2707 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2708 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2709 } 2710 2711 /* DIRICHLET PROBLEM */ 2712 if (dirichlet) { 2713 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2714 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2715 Mat A_IIn; 2716 2717 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2718 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2719 pcis->A_II = A_IIn; 2720 } 2721 if (pcbddc->local_mat->symmetric_set) { 2722 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2723 } 2724 /* Matrix for Dirichlet problem is pcis->A_II */ 2725 n_D = pcis->n - pcis->n_B; 2726 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2727 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2728 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2729 /* default */ 2730 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2731 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2732 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2733 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2734 if (issbaij) { 2735 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2736 } else { 2737 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2738 } 2739 /* Allow user's customization */ 2740 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2741 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2742 } 2743 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2744 if (sub_schurs->reuse_solver) { 2745 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2746 2747 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2748 } 2749 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2750 if (!n_D) { 2751 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2752 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2753 } 2754 /* Set Up KSP for Dirichlet problem of BDDC */ 2755 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2756 /* set ksp_D into pcis data */ 2757 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2758 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2759 pcis->ksp_D = pcbddc->ksp_D; 2760 } 2761 2762 /* NEUMANN PROBLEM */ 2763 A_RR = 0; 2764 if (neumann) { 2765 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2766 PetscInt ibs,mbs; 2767 PetscBool issbaij; 2768 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2769 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2770 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2771 if (pcbddc->ksp_R) { /* already created ksp */ 2772 PetscInt nn_R; 2773 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2774 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2775 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2776 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2777 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2778 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2779 reuse = MAT_INITIAL_MATRIX; 2780 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2781 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2782 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2783 reuse = MAT_INITIAL_MATRIX; 2784 } else { /* safe to reuse the matrix */ 2785 reuse = MAT_REUSE_MATRIX; 2786 } 2787 } 2788 /* last check */ 2789 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2790 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2791 reuse = MAT_INITIAL_MATRIX; 2792 } 2793 } else { /* first time, so we need to create the matrix */ 2794 reuse = MAT_INITIAL_MATRIX; 2795 } 2796 /* extract A_RR */ 2797 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2798 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2799 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2800 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2801 if (matis->A == pcbddc->local_mat) { 2802 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2803 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2804 } else { 2805 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2806 } 2807 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2808 if (matis->A == pcbddc->local_mat) { 2809 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2810 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2811 } else { 2812 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2813 } 2814 } 2815 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2816 if (pcbddc->local_mat->symmetric_set) { 2817 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2818 } 2819 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2820 Mat A_RRn; 2821 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2822 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2823 A_RR = A_RRn; 2824 } 2825 if (!pcbddc->ksp_R) { /* create object if not present */ 2826 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2827 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2828 /* default */ 2829 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2830 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2831 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2832 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2833 if (issbaij) { 2834 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2835 } else { 2836 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2837 } 2838 /* Allow user's customization */ 2839 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2840 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2841 } 2842 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2843 if (!n_R) { 2844 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2845 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2846 } 2847 /* Reuse solver if it is present */ 2848 if (sub_schurs->reuse_solver) { 2849 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2850 2851 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2852 } 2853 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2854 /* Set Up KSP for Neumann problem of BDDC */ 2855 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2856 } 2857 /* free Neumann problem's matrix */ 2858 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2859 2860 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2861 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2862 if (pcbddc->dbg_flag) { 2863 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2864 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2865 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2866 } 2867 if (dirichlet) { /* Dirichlet */ 2868 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2869 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2870 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2871 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2872 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2873 /* need to be adapted? */ 2874 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2875 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2876 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2877 /* print info */ 2878 if (pcbddc->dbg_flag) { 2879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2881 } 2882 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2883 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2884 } 2885 } 2886 if (neumann) { /* Neumann */ 2887 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2888 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2889 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2890 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2891 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2892 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2893 /* need to be adapted? */ 2894 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2895 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2896 /* print info */ 2897 if (pcbddc->dbg_flag) { 2898 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2899 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2900 } 2901 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2902 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2903 } 2904 } 2905 } 2906 PetscFunctionReturn(0); 2907 } 2908 2909 #undef __FUNCT__ 2910 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2911 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2912 { 2913 PetscErrorCode ierr; 2914 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2915 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2916 2917 PetscFunctionBegin; 2918 if (!sub_schurs->reuse_solver) { 2919 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2920 } 2921 if (!pcbddc->switch_static) { 2922 if (applytranspose && pcbddc->local_auxmat1) { 2923 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2924 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2925 } 2926 if (!sub_schurs->reuse_solver) { 2927 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2928 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2929 } else { 2930 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2931 2932 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2933 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2934 } 2935 } else { 2936 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2937 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2938 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2939 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2940 if (applytranspose && pcbddc->local_auxmat1) { 2941 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2942 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2943 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2944 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2945 } 2946 } 2947 if (!sub_schurs->reuse_solver) { 2948 if (applytranspose) { 2949 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2950 } else { 2951 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2952 } 2953 } else { 2954 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2955 2956 if (applytranspose) { 2957 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2958 } else { 2959 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2960 } 2961 } 2962 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2963 if (!pcbddc->switch_static) { 2964 if (!sub_schurs->reuse_solver) { 2965 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2966 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2967 } else { 2968 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2969 2970 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2971 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2972 } 2973 if (!applytranspose && pcbddc->local_auxmat1) { 2974 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2975 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2976 } 2977 } else { 2978 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2979 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2980 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2981 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2982 if (!applytranspose && pcbddc->local_auxmat1) { 2983 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2984 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2985 } 2986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2988 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2989 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2990 } 2991 PetscFunctionReturn(0); 2992 } 2993 2994 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2995 #undef __FUNCT__ 2996 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2997 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2998 { 2999 PetscErrorCode ierr; 3000 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3001 PC_IS* pcis = (PC_IS*) (pc->data); 3002 const PetscScalar zero = 0.0; 3003 3004 PetscFunctionBegin; 3005 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3006 if (!pcbddc->benign_apply_coarse_only) { 3007 if (applytranspose) { 3008 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3009 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3010 } else { 3011 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3012 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3013 } 3014 } else { 3015 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3016 } 3017 3018 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3019 if (pcbddc->benign_n) { 3020 PetscScalar *array; 3021 PetscInt j; 3022 3023 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3024 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3025 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3026 } 3027 3028 /* start communications from local primal nodes to rhs of coarse solver */ 3029 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3030 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3031 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3032 3033 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3034 /* TODO remove null space when doing multilevel */ 3035 if (pcbddc->coarse_ksp) { 3036 Mat coarse_mat; 3037 Vec rhs,sol; 3038 MatNullSpace nullsp; 3039 3040 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3041 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3042 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3043 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3044 if (nullsp) { 3045 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3046 } 3047 if (applytranspose) { 3048 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3049 } else { 3050 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3051 } 3052 if (nullsp) { 3053 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3054 } 3055 } 3056 3057 /* Local solution on R nodes */ 3058 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3059 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3060 } 3061 /* communications from coarse sol to local primal nodes */ 3062 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3063 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3064 3065 /* Sum contributions from the two levels */ 3066 if (!pcbddc->benign_apply_coarse_only) { 3067 if (applytranspose) { 3068 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3069 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3070 } else { 3071 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3072 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3073 } 3074 /* store p0 */ 3075 if (pcbddc->benign_n) { 3076 PetscScalar *array; 3077 PetscInt j; 3078 3079 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3080 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3081 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3082 } 3083 } else { /* expand the coarse solution */ 3084 if (applytranspose) { 3085 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3086 } else { 3087 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3088 } 3089 } 3090 PetscFunctionReturn(0); 3091 } 3092 3093 #undef __FUNCT__ 3094 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3095 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3096 { 3097 PetscErrorCode ierr; 3098 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3099 PetscScalar *array; 3100 Vec from,to; 3101 3102 PetscFunctionBegin; 3103 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3104 from = pcbddc->coarse_vec; 3105 to = pcbddc->vec1_P; 3106 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3107 Vec tvec; 3108 3109 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3110 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3111 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3112 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3113 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3114 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3115 } 3116 } else { /* from local to global -> put data in coarse right hand side */ 3117 from = pcbddc->vec1_P; 3118 to = pcbddc->coarse_vec; 3119 } 3120 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3121 PetscFunctionReturn(0); 3122 } 3123 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3126 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3127 { 3128 PetscErrorCode ierr; 3129 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3130 PetscScalar *array; 3131 Vec from,to; 3132 3133 PetscFunctionBegin; 3134 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3135 from = pcbddc->coarse_vec; 3136 to = pcbddc->vec1_P; 3137 } else { /* from local to global -> put data in coarse right hand side */ 3138 from = pcbddc->vec1_P; 3139 to = pcbddc->coarse_vec; 3140 } 3141 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3142 if (smode == SCATTER_FORWARD) { 3143 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3144 Vec tvec; 3145 3146 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3147 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3148 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3149 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3150 } 3151 } else { 3152 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3153 ierr = VecResetArray(from);CHKERRQ(ierr); 3154 } 3155 } 3156 PetscFunctionReturn(0); 3157 } 3158 3159 /* uncomment for testing purposes */ 3160 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3161 #undef __FUNCT__ 3162 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3163 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3164 { 3165 PetscErrorCode ierr; 3166 PC_IS* pcis = (PC_IS*)(pc->data); 3167 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3168 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3169 /* one and zero */ 3170 PetscScalar one=1.0,zero=0.0; 3171 /* space to store constraints and their local indices */ 3172 PetscScalar *constraints_data; 3173 PetscInt *constraints_idxs,*constraints_idxs_B; 3174 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3175 PetscInt *constraints_n; 3176 /* iterators */ 3177 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3178 /* BLAS integers */ 3179 PetscBLASInt lwork,lierr; 3180 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3181 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3182 /* reuse */ 3183 PetscInt olocal_primal_size,olocal_primal_size_cc; 3184 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3185 /* change of basis */ 3186 PetscBool qr_needed; 3187 PetscBT change_basis,qr_needed_idx; 3188 /* auxiliary stuff */ 3189 PetscInt *nnz,*is_indices; 3190 PetscInt ncc; 3191 /* some quantities */ 3192 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3193 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3194 3195 PetscFunctionBegin; 3196 /* Destroy Mat objects computed previously */ 3197 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3198 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3199 /* save info on constraints from previous setup (if any) */ 3200 olocal_primal_size = pcbddc->local_primal_size; 3201 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3202 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3203 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3204 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3205 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3206 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3207 3208 /* print some info */ 3209 if (pcbddc->dbg_flag) { 3210 IS vertices; 3211 PetscInt nv,nedges,nfaces; 3212 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3213 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3214 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3215 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3219 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3220 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3221 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3222 } 3223 3224 if (!pcbddc->adaptive_selection) { 3225 IS ISForVertices,*ISForFaces,*ISForEdges; 3226 MatNullSpace nearnullsp; 3227 const Vec *nearnullvecs; 3228 Vec *localnearnullsp; 3229 PetscScalar *array; 3230 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3231 PetscBool nnsp_has_cnst; 3232 /* LAPACK working arrays for SVD or POD */ 3233 PetscBool skip_lapack,boolforchange; 3234 PetscScalar *work; 3235 PetscReal *singular_vals; 3236 #if defined(PETSC_USE_COMPLEX) 3237 PetscReal *rwork; 3238 #endif 3239 #if defined(PETSC_MISSING_LAPACK_GESVD) 3240 PetscScalar *temp_basis,*correlation_mat; 3241 #else 3242 PetscBLASInt dummy_int=1; 3243 PetscScalar dummy_scalar=1.; 3244 #endif 3245 3246 /* Get index sets for faces, edges and vertices from graph */ 3247 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3248 /* free unneeded index sets */ 3249 if (!pcbddc->use_vertices) { 3250 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3251 } 3252 if (!pcbddc->use_edges) { 3253 for (i=0;i<n_ISForEdges;i++) { 3254 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3255 } 3256 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3257 n_ISForEdges = 0; 3258 } 3259 if (!pcbddc->use_faces) { 3260 for (i=0;i<n_ISForFaces;i++) { 3261 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3262 } 3263 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3264 n_ISForFaces = 0; 3265 } 3266 3267 #if defined(PETSC_USE_DEBUG) 3268 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3269 Also use_change_of_basis should be consistent among processors */ 3270 if (pcbddc->NullSpace) { 3271 PetscBool tbool[2],gbool[2]; 3272 3273 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3274 pcbddc->use_change_of_basis = PETSC_TRUE; 3275 if (!ISForEdges) { 3276 pcbddc->use_change_on_faces = PETSC_TRUE; 3277 } 3278 } 3279 tbool[0] = pcbddc->use_change_of_basis; 3280 tbool[1] = pcbddc->use_change_on_faces; 3281 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3282 pcbddc->use_change_of_basis = gbool[0]; 3283 pcbddc->use_change_on_faces = gbool[1]; 3284 } 3285 #endif 3286 3287 /* check if near null space is attached to global mat */ 3288 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3289 if (nearnullsp) { 3290 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3291 /* remove any stored info */ 3292 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3293 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3294 /* store information for BDDC solver reuse */ 3295 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3296 pcbddc->onearnullspace = nearnullsp; 3297 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3298 for (i=0;i<nnsp_size;i++) { 3299 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3300 } 3301 } else { /* if near null space is not provided BDDC uses constants by default */ 3302 nnsp_size = 0; 3303 nnsp_has_cnst = PETSC_TRUE; 3304 } 3305 /* get max number of constraints on a single cc */ 3306 max_constraints = nnsp_size; 3307 if (nnsp_has_cnst) max_constraints++; 3308 3309 /* 3310 Evaluate maximum storage size needed by the procedure 3311 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3312 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3313 There can be multiple constraints per connected component 3314 */ 3315 n_vertices = 0; 3316 if (ISForVertices) { 3317 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3318 } 3319 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3320 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3321 3322 total_counts = n_ISForFaces+n_ISForEdges; 3323 total_counts *= max_constraints; 3324 total_counts += n_vertices; 3325 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3326 3327 total_counts = 0; 3328 max_size_of_constraint = 0; 3329 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3330 IS used_is; 3331 if (i<n_ISForEdges) { 3332 used_is = ISForEdges[i]; 3333 } else { 3334 used_is = ISForFaces[i-n_ISForEdges]; 3335 } 3336 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3337 total_counts += j; 3338 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3339 } 3340 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3341 3342 /* get local part of global near null space vectors */ 3343 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3344 for (k=0;k<nnsp_size;k++) { 3345 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3346 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3347 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3348 } 3349 3350 /* whether or not to skip lapack calls */ 3351 skip_lapack = PETSC_TRUE; 3352 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3353 3354 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3355 if (!skip_lapack) { 3356 PetscScalar temp_work; 3357 3358 #if defined(PETSC_MISSING_LAPACK_GESVD) 3359 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3360 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3361 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3362 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3363 #if defined(PETSC_USE_COMPLEX) 3364 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3365 #endif 3366 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3367 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3368 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3369 lwork = -1; 3370 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3371 #if !defined(PETSC_USE_COMPLEX) 3372 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3373 #else 3374 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3375 #endif 3376 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3377 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3378 #else /* on missing GESVD */ 3379 /* SVD */ 3380 PetscInt max_n,min_n; 3381 max_n = max_size_of_constraint; 3382 min_n = max_constraints; 3383 if (max_size_of_constraint < max_constraints) { 3384 min_n = max_size_of_constraint; 3385 max_n = max_constraints; 3386 } 3387 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3388 #if defined(PETSC_USE_COMPLEX) 3389 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3390 #endif 3391 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3392 lwork = -1; 3393 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3394 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3395 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3396 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3397 #if !defined(PETSC_USE_COMPLEX) 3398 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3399 #else 3400 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3401 #endif 3402 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3403 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3404 #endif /* on missing GESVD */ 3405 /* Allocate optimal workspace */ 3406 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3407 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3408 } 3409 /* Now we can loop on constraining sets */ 3410 total_counts = 0; 3411 constraints_idxs_ptr[0] = 0; 3412 constraints_data_ptr[0] = 0; 3413 /* vertices */ 3414 if (n_vertices) { 3415 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3416 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3417 for (i=0;i<n_vertices;i++) { 3418 constraints_n[total_counts] = 1; 3419 constraints_data[total_counts] = 1.0; 3420 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3421 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3422 total_counts++; 3423 } 3424 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3425 n_vertices = total_counts; 3426 } 3427 3428 /* edges and faces */ 3429 total_counts_cc = total_counts; 3430 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3431 IS used_is; 3432 PetscBool idxs_copied = PETSC_FALSE; 3433 3434 if (ncc<n_ISForEdges) { 3435 used_is = ISForEdges[ncc]; 3436 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3437 } else { 3438 used_is = ISForFaces[ncc-n_ISForEdges]; 3439 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3440 } 3441 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3442 3443 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3444 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3445 /* change of basis should not be performed on local periodic nodes */ 3446 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3447 if (nnsp_has_cnst) { 3448 PetscScalar quad_value; 3449 3450 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3451 idxs_copied = PETSC_TRUE; 3452 3453 if (!pcbddc->use_nnsp_true) { 3454 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3455 } else { 3456 quad_value = 1.0; 3457 } 3458 for (j=0;j<size_of_constraint;j++) { 3459 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3460 } 3461 temp_constraints++; 3462 total_counts++; 3463 } 3464 for (k=0;k<nnsp_size;k++) { 3465 PetscReal real_value; 3466 PetscScalar *ptr_to_data; 3467 3468 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3469 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3470 for (j=0;j<size_of_constraint;j++) { 3471 ptr_to_data[j] = array[is_indices[j]]; 3472 } 3473 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3474 /* check if array is null on the connected component */ 3475 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3476 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3477 if (real_value > 0.0) { /* keep indices and values */ 3478 temp_constraints++; 3479 total_counts++; 3480 if (!idxs_copied) { 3481 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3482 idxs_copied = PETSC_TRUE; 3483 } 3484 } 3485 } 3486 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3487 valid_constraints = temp_constraints; 3488 if (!pcbddc->use_nnsp_true && temp_constraints) { 3489 if (temp_constraints == 1) { /* just normalize the constraint */ 3490 PetscScalar norm,*ptr_to_data; 3491 3492 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3493 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3494 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3495 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3496 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3497 } else { /* perform SVD */ 3498 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3499 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3500 3501 #if defined(PETSC_MISSING_LAPACK_GESVD) 3502 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3503 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3504 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3505 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3506 from that computed using LAPACKgesvd 3507 -> This is due to a different computation of eigenvectors in LAPACKheev 3508 -> The quality of the POD-computed basis will be the same */ 3509 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3510 /* Store upper triangular part of correlation matrix */ 3511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3512 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3513 for (j=0;j<temp_constraints;j++) { 3514 for (k=0;k<j+1;k++) { 3515 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3516 } 3517 } 3518 /* compute eigenvalues and eigenvectors of correlation matrix */ 3519 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3520 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3521 #if !defined(PETSC_USE_COMPLEX) 3522 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3523 #else 3524 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3525 #endif 3526 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3527 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3528 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3529 j = 0; 3530 while (j < temp_constraints && singular_vals[j] < tol) j++; 3531 total_counts = total_counts-j; 3532 valid_constraints = temp_constraints-j; 3533 /* scale and copy POD basis into used quadrature memory */ 3534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3535 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3536 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3537 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3538 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3539 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3540 if (j<temp_constraints) { 3541 PetscInt ii; 3542 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3543 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3544 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3545 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3546 for (k=0;k<temp_constraints-j;k++) { 3547 for (ii=0;ii<size_of_constraint;ii++) { 3548 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3549 } 3550 } 3551 } 3552 #else /* on missing GESVD */ 3553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3554 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3556 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3557 #if !defined(PETSC_USE_COMPLEX) 3558 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3559 #else 3560 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3561 #endif 3562 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3563 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3564 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3565 k = temp_constraints; 3566 if (k > size_of_constraint) k = size_of_constraint; 3567 j = 0; 3568 while (j < k && singular_vals[k-j-1] < tol) j++; 3569 valid_constraints = k-j; 3570 total_counts = total_counts-temp_constraints+valid_constraints; 3571 #endif /* on missing GESVD */ 3572 } 3573 } 3574 /* update pointers information */ 3575 if (valid_constraints) { 3576 constraints_n[total_counts_cc] = valid_constraints; 3577 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3578 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3579 /* set change_of_basis flag */ 3580 if (boolforchange) { 3581 PetscBTSet(change_basis,total_counts_cc); 3582 } 3583 total_counts_cc++; 3584 } 3585 } 3586 /* free workspace */ 3587 if (!skip_lapack) { 3588 ierr = PetscFree(work);CHKERRQ(ierr); 3589 #if defined(PETSC_USE_COMPLEX) 3590 ierr = PetscFree(rwork);CHKERRQ(ierr); 3591 #endif 3592 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3593 #if defined(PETSC_MISSING_LAPACK_GESVD) 3594 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3595 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3596 #endif 3597 } 3598 for (k=0;k<nnsp_size;k++) { 3599 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3600 } 3601 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3602 /* free index sets of faces, edges and vertices */ 3603 for (i=0;i<n_ISForFaces;i++) { 3604 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3605 } 3606 if (n_ISForFaces) { 3607 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3608 } 3609 for (i=0;i<n_ISForEdges;i++) { 3610 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3611 } 3612 if (n_ISForEdges) { 3613 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3614 } 3615 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3616 } else { 3617 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3618 3619 total_counts = 0; 3620 n_vertices = 0; 3621 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3622 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3623 } 3624 max_constraints = 0; 3625 total_counts_cc = 0; 3626 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3627 total_counts += pcbddc->adaptive_constraints_n[i]; 3628 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3629 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3630 } 3631 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3632 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3633 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3634 constraints_data = pcbddc->adaptive_constraints_data; 3635 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3636 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3637 total_counts_cc = 0; 3638 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3639 if (pcbddc->adaptive_constraints_n[i]) { 3640 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3641 } 3642 } 3643 #if 0 3644 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3645 for (i=0;i<total_counts_cc;i++) { 3646 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3647 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3648 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3649 printf(" %d",constraints_idxs[j]); 3650 } 3651 printf("\n"); 3652 printf("number of cc: %d\n",constraints_n[i]); 3653 } 3654 for (i=0;i<n_vertices;i++) { 3655 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3656 } 3657 for (i=0;i<sub_schurs->n_subs;i++) { 3658 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3659 } 3660 #endif 3661 3662 max_size_of_constraint = 0; 3663 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3664 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3665 /* Change of basis */ 3666 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3667 if (pcbddc->use_change_of_basis) { 3668 for (i=0;i<sub_schurs->n_subs;i++) { 3669 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3670 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3671 } 3672 } 3673 } 3674 } 3675 pcbddc->local_primal_size = total_counts; 3676 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3677 3678 /* map constraints_idxs in boundary numbering */ 3679 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3680 if (i != constraints_idxs_ptr[total_counts_cc]) { 3681 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3682 } 3683 3684 /* Create constraint matrix */ 3685 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3686 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3687 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3688 3689 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3690 /* determine if a QR strategy is needed for change of basis */ 3691 qr_needed = PETSC_FALSE; 3692 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3693 total_primal_vertices=0; 3694 pcbddc->local_primal_size_cc = 0; 3695 for (i=0;i<total_counts_cc;i++) { 3696 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3697 if (size_of_constraint == 1) { 3698 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3699 pcbddc->local_primal_size_cc += 1; 3700 } else if (PetscBTLookup(change_basis,i)) { 3701 for (k=0;k<constraints_n[i];k++) { 3702 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3703 } 3704 pcbddc->local_primal_size_cc += constraints_n[i]; 3705 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3706 PetscBTSet(qr_needed_idx,i); 3707 qr_needed = PETSC_TRUE; 3708 } 3709 } else { 3710 pcbddc->local_primal_size_cc += 1; 3711 } 3712 } 3713 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3714 pcbddc->n_vertices = total_primal_vertices; 3715 /* permute indices in order to have a sorted set of vertices */ 3716 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3717 3718 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3719 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3720 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3721 3722 /* nonzero structure of constraint matrix */ 3723 /* and get reference dof for local constraints */ 3724 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3725 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3726 3727 j = total_primal_vertices; 3728 total_counts = total_primal_vertices; 3729 cum = total_primal_vertices; 3730 for (i=n_vertices;i<total_counts_cc;i++) { 3731 if (!PetscBTLookup(change_basis,i)) { 3732 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3733 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3734 cum++; 3735 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3736 for (k=0;k<constraints_n[i];k++) { 3737 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3738 nnz[j+k] = size_of_constraint; 3739 } 3740 j += constraints_n[i]; 3741 } 3742 } 3743 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3744 ierr = PetscFree(nnz);CHKERRQ(ierr); 3745 3746 /* set values in constraint matrix */ 3747 for (i=0;i<total_primal_vertices;i++) { 3748 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3749 } 3750 total_counts = total_primal_vertices; 3751 for (i=n_vertices;i<total_counts_cc;i++) { 3752 if (!PetscBTLookup(change_basis,i)) { 3753 PetscInt *cols; 3754 3755 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3756 cols = constraints_idxs+constraints_idxs_ptr[i]; 3757 for (k=0;k<constraints_n[i];k++) { 3758 PetscInt row = total_counts+k; 3759 PetscScalar *vals; 3760 3761 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3762 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3763 } 3764 total_counts += constraints_n[i]; 3765 } 3766 } 3767 /* assembling */ 3768 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3769 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3770 3771 /* 3772 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3773 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3774 */ 3775 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3776 if (pcbddc->use_change_of_basis) { 3777 /* dual and primal dofs on a single cc */ 3778 PetscInt dual_dofs,primal_dofs; 3779 /* working stuff for GEQRF */ 3780 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3781 PetscBLASInt lqr_work; 3782 /* working stuff for UNGQR */ 3783 PetscScalar *gqr_work,lgqr_work_t; 3784 PetscBLASInt lgqr_work; 3785 /* working stuff for TRTRS */ 3786 PetscScalar *trs_rhs; 3787 PetscBLASInt Blas_NRHS; 3788 /* pointers for values insertion into change of basis matrix */ 3789 PetscInt *start_rows,*start_cols; 3790 PetscScalar *start_vals; 3791 /* working stuff for values insertion */ 3792 PetscBT is_primal; 3793 PetscInt *aux_primal_numbering_B; 3794 /* matrix sizes */ 3795 PetscInt global_size,local_size; 3796 /* temporary change of basis */ 3797 Mat localChangeOfBasisMatrix; 3798 /* extra space for debugging */ 3799 PetscScalar *dbg_work; 3800 3801 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3802 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3803 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3804 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3805 /* nonzeros for local mat */ 3806 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3807 for (i=0;i<pcis->n;i++) nnz[i]=1; 3808 for (i=n_vertices;i<total_counts_cc;i++) { 3809 if (PetscBTLookup(change_basis,i)) { 3810 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3811 if (PetscBTLookup(qr_needed_idx,i)) { 3812 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3813 } else { 3814 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3815 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3816 } 3817 } 3818 } 3819 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3820 ierr = PetscFree(nnz);CHKERRQ(ierr); 3821 /* Set initial identity in the matrix */ 3822 for (i=0;i<pcis->n;i++) { 3823 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3824 } 3825 3826 if (pcbddc->dbg_flag) { 3827 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3828 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3829 } 3830 3831 3832 /* Now we loop on the constraints which need a change of basis */ 3833 /* 3834 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3835 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3836 3837 Basic blocks of change of basis matrix T computed by 3838 3839 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3840 3841 | 1 0 ... 0 s_1/S | 3842 | 0 1 ... 0 s_2/S | 3843 | ... | 3844 | 0 ... 1 s_{n-1}/S | 3845 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3846 3847 with S = \sum_{i=1}^n s_i^2 3848 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3849 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3850 3851 - QR decomposition of constraints otherwise 3852 */ 3853 if (qr_needed) { 3854 /* space to store Q */ 3855 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3856 /* first we issue queries for optimal work */ 3857 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3858 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3859 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3860 lqr_work = -1; 3861 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3862 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3863 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3864 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3865 lgqr_work = -1; 3866 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3867 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3868 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3869 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3870 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3871 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3872 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3873 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3874 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3875 /* array to store scaling factors for reflectors */ 3876 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3877 /* array to store rhs and solution of triangular solver */ 3878 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3879 /* allocating workspace for check */ 3880 if (pcbddc->dbg_flag) { 3881 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3882 } 3883 } 3884 /* array to store whether a node is primal or not */ 3885 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3886 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3887 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3888 if (i != total_primal_vertices) { 3889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3890 } 3891 for (i=0;i<total_primal_vertices;i++) { 3892 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3893 } 3894 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3895 3896 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3897 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3898 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3899 if (PetscBTLookup(change_basis,total_counts)) { 3900 /* get constraint info */ 3901 primal_dofs = constraints_n[total_counts]; 3902 dual_dofs = size_of_constraint-primal_dofs; 3903 3904 if (pcbddc->dbg_flag) { 3905 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3906 } 3907 3908 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3909 3910 /* copy quadrature constraints for change of basis check */ 3911 if (pcbddc->dbg_flag) { 3912 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3913 } 3914 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3915 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3916 3917 /* compute QR decomposition of constraints */ 3918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3919 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3920 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3921 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3922 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3923 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3924 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3925 3926 /* explictly compute R^-T */ 3927 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3928 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3929 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3930 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3931 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3932 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3933 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3934 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3935 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3936 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3937 3938 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3939 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3940 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3941 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3942 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3943 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3944 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3945 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3946 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3947 3948 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3949 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3950 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3951 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3952 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3953 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3954 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3955 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3957 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3958 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3959 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3960 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3961 3962 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3963 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3964 /* insert cols for primal dofs */ 3965 for (j=0;j<primal_dofs;j++) { 3966 start_vals = &qr_basis[j*size_of_constraint]; 3967 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3968 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3969 } 3970 /* insert cols for dual dofs */ 3971 for (j=0,k=0;j<dual_dofs;k++) { 3972 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3973 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3974 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3975 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3976 j++; 3977 } 3978 } 3979 3980 /* check change of basis */ 3981 if (pcbddc->dbg_flag) { 3982 PetscInt ii,jj; 3983 PetscBool valid_qr=PETSC_TRUE; 3984 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3985 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3986 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3987 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3988 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3989 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3990 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3991 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3992 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3993 for (jj=0;jj<size_of_constraint;jj++) { 3994 for (ii=0;ii<primal_dofs;ii++) { 3995 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3996 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3997 } 3998 } 3999 if (!valid_qr) { 4000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4001 for (jj=0;jj<size_of_constraint;jj++) { 4002 for (ii=0;ii<primal_dofs;ii++) { 4003 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4004 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4005 } 4006 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4007 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4008 } 4009 } 4010 } 4011 } else { 4012 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4013 } 4014 } 4015 } else { /* simple transformation block */ 4016 PetscInt row,col; 4017 PetscScalar val,norm; 4018 4019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4020 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4021 for (j=0;j<size_of_constraint;j++) { 4022 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4023 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4024 if (!PetscBTLookup(is_primal,row_B)) { 4025 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4026 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4027 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4028 } else { 4029 for (k=0;k<size_of_constraint;k++) { 4030 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4031 if (row != col) { 4032 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4033 } else { 4034 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4035 } 4036 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4037 } 4038 } 4039 } 4040 if (pcbddc->dbg_flag) { 4041 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4042 } 4043 } 4044 } else { 4045 if (pcbddc->dbg_flag) { 4046 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4047 } 4048 } 4049 } 4050 4051 /* free workspace */ 4052 if (qr_needed) { 4053 if (pcbddc->dbg_flag) { 4054 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4055 } 4056 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4057 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4058 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4059 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4060 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4061 } 4062 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4063 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4064 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4065 4066 /* assembling of global change of variable */ 4067 { 4068 Mat tmat; 4069 PetscInt bs; 4070 4071 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4072 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4073 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4074 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4075 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4076 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4077 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4078 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4079 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4080 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4081 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4082 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4083 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4084 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4085 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4086 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4087 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4088 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4089 } 4090 /* check */ 4091 if (pcbddc->dbg_flag) { 4092 PetscReal error; 4093 Vec x,x_change; 4094 4095 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4096 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4097 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4098 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4099 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4100 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4101 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4102 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4103 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4104 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4105 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4106 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4107 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4108 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4109 ierr = VecDestroy(&x);CHKERRQ(ierr); 4110 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4111 } 4112 4113 /* adapt sub_schurs computed (if any) */ 4114 if (pcbddc->use_deluxe_scaling) { 4115 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4116 if (sub_schurs->S_Ej_all) { 4117 Mat S_new,tmat; 4118 ISLocalToGlobalMapping NtoSall; 4119 IS is_all_N,is_V,is_V_Sall; 4120 const PetscScalar *array; 4121 const PetscInt *idxs_V,*idxs_all; 4122 PetscInt i,n_V; 4123 4124 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4125 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4126 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4127 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4128 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4129 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4130 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4131 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4132 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4133 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4134 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4135 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4136 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4137 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4138 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4139 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4140 for (i=0;i<n_V;i++) { 4141 PetscScalar val; 4142 PetscInt idx; 4143 4144 idx = idxs_V[i]; 4145 val = array[idxs_all[idxs_V[i]]]; 4146 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4147 } 4148 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 sub_schurs->S_Ej_all = S_new; 4151 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4152 if (sub_schurs->sum_S_Ej_all) { 4153 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4154 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4155 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4156 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4157 sub_schurs->sum_S_Ej_all = S_new; 4158 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4159 } 4160 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4161 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4162 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4163 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4164 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4165 } 4166 } 4167 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4168 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4169 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4170 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4171 } 4172 4173 /* set up change of basis context */ 4174 if (pcbddc->ChangeOfBasisMatrix) { 4175 PCBDDCChange_ctx change_ctx; 4176 4177 if (!pcbddc->new_global_mat) { 4178 PetscInt global_size,local_size; 4179 4180 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4181 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4182 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4183 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4184 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4185 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4186 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4187 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4188 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4189 } else { 4190 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4191 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4192 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4193 } 4194 if (!pcbddc->user_ChangeOfBasisMatrix) { 4195 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4196 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4197 } else { 4198 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4199 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4200 } 4201 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4202 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4203 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4204 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4205 } else { 4206 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4207 } 4208 4209 /* add pressure dofs to set of primal nodes for numbering purposes */ 4210 for (i=0;i<pcbddc->benign_n;i++) { 4211 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4212 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4213 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4214 pcbddc->local_primal_size_cc++; 4215 pcbddc->local_primal_size++; 4216 } 4217 4218 /* check if a new primal space has been introduced (also take into account benign trick) */ 4219 pcbddc->new_primal_space_local = PETSC_TRUE; 4220 if (olocal_primal_size == pcbddc->local_primal_size) { 4221 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4222 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4223 if (!pcbddc->new_primal_space_local) { 4224 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4225 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4226 } 4227 } 4228 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4229 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4230 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4231 4232 /* flush dbg viewer */ 4233 if (pcbddc->dbg_flag) { 4234 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4235 } 4236 4237 /* free workspace */ 4238 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4239 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4240 if (!pcbddc->adaptive_selection) { 4241 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4242 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4243 } else { 4244 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4245 pcbddc->adaptive_constraints_idxs_ptr, 4246 pcbddc->adaptive_constraints_data_ptr, 4247 pcbddc->adaptive_constraints_idxs, 4248 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4249 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4250 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4251 } 4252 PetscFunctionReturn(0); 4253 } 4254 4255 #undef __FUNCT__ 4256 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4257 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4258 { 4259 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4260 PC_IS *pcis = (PC_IS*)pc->data; 4261 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4262 PetscInt ierr,i,vertex_size,N; 4263 PetscViewer viewer=pcbddc->dbg_viewer; 4264 4265 PetscFunctionBegin; 4266 /* Reset previously computed graph */ 4267 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4268 /* Init local Graph struct */ 4269 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4270 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4271 4272 /* Check validity of the csr graph passed in by the user */ 4273 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4274 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4275 } 4276 4277 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4278 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4279 PetscInt *xadj,*adjncy; 4280 PetscInt nvtxs; 4281 PetscBool flg_row=PETSC_FALSE; 4282 4283 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4284 if (flg_row) { 4285 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4286 pcbddc->computed_rowadj = PETSC_TRUE; 4287 } 4288 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4289 } 4290 if (pcbddc->dbg_flag) { 4291 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4292 } 4293 4294 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4295 vertex_size = 1; 4296 if (pcbddc->user_provided_isfordofs) { 4297 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4298 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4299 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4300 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4301 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4302 } 4303 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4304 pcbddc->n_ISForDofs = 0; 4305 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4306 } 4307 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4308 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4309 } else { 4310 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4311 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4312 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4313 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4314 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4315 } 4316 } 4317 } 4318 4319 /* Setup of Graph */ 4320 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4321 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4322 } 4323 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4324 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4325 } 4326 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4327 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4328 } 4329 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4330 4331 /* attach info on disconnected subdomains if present */ 4332 if (pcbddc->n_local_subs) { 4333 PetscInt *local_subs; 4334 4335 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4336 for (i=0;i<pcbddc->n_local_subs;i++) { 4337 const PetscInt *idxs; 4338 PetscInt nl,j; 4339 4340 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4341 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4342 for (j=0;j<nl;j++) { 4343 local_subs[idxs[j]] = i; 4344 } 4345 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4346 } 4347 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4348 pcbddc->mat_graph->local_subs = local_subs; 4349 } 4350 4351 /* Graph's connected components analysis */ 4352 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4353 4354 /* print some info to stdout */ 4355 if (pcbddc->dbg_flag) { 4356 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4357 } 4358 4359 /* mark topography has done */ 4360 pcbddc->recompute_topography = PETSC_FALSE; 4361 PetscFunctionReturn(0); 4362 } 4363 4364 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4365 #undef __FUNCT__ 4366 #define __FUNCT__ "PCBDDCSubsetNumbering" 4367 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4368 { 4369 PetscSF sf; 4370 PetscLayout map; 4371 const PetscInt *idxs; 4372 PetscInt *leaf_data,*root_data,*gidxs; 4373 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4374 PetscInt n_n,nlocals,start,first_index; 4375 PetscMPIInt commsize; 4376 PetscBool first_found; 4377 PetscErrorCode ierr; 4378 4379 PetscFunctionBegin; 4380 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4381 if (subset_mult) { 4382 PetscCheckSameComm(subset,1,subset_mult,2); 4383 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4384 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4385 } 4386 /* create workspace layout for computing global indices of subset */ 4387 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4388 lbounds[0] = lbounds[1] = 0; 4389 for (i=0;i<n;i++) { 4390 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4391 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4392 } 4393 lbounds[0] = -lbounds[0]; 4394 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4395 gbounds[0] = -gbounds[0]; 4396 N = gbounds[1] - gbounds[0] + 1; 4397 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4398 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4399 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4400 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4401 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4402 4403 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4404 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4405 if (subset_mult) { 4406 const PetscInt* idxs_mult; 4407 4408 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4409 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4410 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4411 } else { 4412 for (i=0;i<n;i++) leaf_data[i] = 1; 4413 } 4414 /* local size of new subset */ 4415 n_n = 0; 4416 for (i=0;i<n;i++) n_n += leaf_data[i]; 4417 4418 /* global indexes in layout */ 4419 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4420 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4421 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4422 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4423 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4424 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4425 4426 /* reduce from leaves to roots */ 4427 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4428 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4429 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4430 4431 /* count indexes in local part of layout */ 4432 nlocals = 0; 4433 first_index = -1; 4434 first_found = PETSC_FALSE; 4435 for (i=0;i<Nl;i++) { 4436 if (!first_found && root_data[i]) { 4437 first_found = PETSC_TRUE; 4438 first_index = i; 4439 } 4440 nlocals += root_data[i]; 4441 } 4442 4443 /* cumulative of number of indexes and size of subset without holes */ 4444 #if defined(PETSC_HAVE_MPI_EXSCAN) 4445 start = 0; 4446 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4447 #else 4448 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4449 start = start-nlocals; 4450 #endif 4451 4452 if (N_n) { /* compute total size of new subset if requested */ 4453 *N_n = start + nlocals; 4454 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4455 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4456 } 4457 4458 /* adapt root data with cumulative */ 4459 if (first_found) { 4460 PetscInt old_index; 4461 4462 root_data[first_index] += start; 4463 old_index = first_index; 4464 for (i=first_index+1;i<Nl;i++) { 4465 if (root_data[i]) { 4466 root_data[i] += root_data[old_index]; 4467 old_index = i; 4468 } 4469 } 4470 } 4471 4472 /* from roots to leaves */ 4473 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4474 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4475 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4476 4477 /* create new IS with global indexes without holes */ 4478 if (subset_mult) { 4479 const PetscInt* idxs_mult; 4480 PetscInt cum; 4481 4482 cum = 0; 4483 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4484 for (i=0;i<n;i++) { 4485 PetscInt j; 4486 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4487 } 4488 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4489 } else { 4490 for (i=0;i<n;i++) { 4491 gidxs[i] = leaf_data[i]-1; 4492 } 4493 } 4494 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4495 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4496 PetscFunctionReturn(0); 4497 } 4498 4499 #undef __FUNCT__ 4500 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4501 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4502 { 4503 PetscInt i,j; 4504 PetscScalar *alphas; 4505 PetscErrorCode ierr; 4506 4507 PetscFunctionBegin; 4508 /* this implements stabilized Gram-Schmidt */ 4509 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4510 for (i=0;i<n;i++) { 4511 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4512 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4513 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4514 } 4515 ierr = PetscFree(alphas);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 4519 #undef __FUNCT__ 4520 #define __FUNCT__ "MatISGetSubassemblingPattern" 4521 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4522 { 4523 IS ranks_send_to; 4524 PetscInt n_neighs,*neighs,*n_shared,**shared; 4525 PetscMPIInt size,rank,color; 4526 PetscInt *xadj,*adjncy; 4527 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4528 PetscInt i,local_size,threshold=0; 4529 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4530 PetscSubcomm subcomm; 4531 PetscErrorCode ierr; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4535 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4536 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4537 4538 /* Get info on mapping */ 4539 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4540 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4541 4542 /* build local CSR graph of subdomains' connectivity */ 4543 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4544 xadj[0] = 0; 4545 xadj[1] = PetscMax(n_neighs-1,0); 4546 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4547 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4548 4549 if (threshold) { 4550 PetscInt xadj_count = 0; 4551 for (i=1;i<n_neighs;i++) { 4552 if (n_shared[i] > threshold) { 4553 adjncy[xadj_count] = neighs[i]; 4554 adjncy_wgt[xadj_count] = n_shared[i]; 4555 xadj_count++; 4556 } 4557 } 4558 xadj[1] = xadj_count; 4559 } else { 4560 if (xadj[1]) { 4561 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4562 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4563 } 4564 } 4565 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4566 if (use_square) { 4567 for (i=0;i<xadj[1];i++) { 4568 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4569 } 4570 } 4571 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4572 4573 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4574 4575 /* 4576 Restrict work on active processes only. 4577 */ 4578 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4579 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4580 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4581 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4582 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4583 if (color) { 4584 ierr = PetscFree(xadj);CHKERRQ(ierr); 4585 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4586 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4587 } else { 4588 Mat subdomain_adj; 4589 IS new_ranks,new_ranks_contig; 4590 MatPartitioning partitioner; 4591 PetscInt prank,rstart=0,rend=0; 4592 PetscInt *is_indices,*oldranks; 4593 PetscBool aggregate; 4594 4595 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4596 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4597 prank = rank; 4598 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4599 /* 4600 for (i=0;i<size;i++) { 4601 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4602 } 4603 */ 4604 for (i=0;i<xadj[1];i++) { 4605 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4606 } 4607 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4608 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4609 if (aggregate) { 4610 PetscInt lrows,row,ncols,*cols; 4611 PetscMPIInt nrank; 4612 PetscScalar *vals; 4613 4614 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4615 lrows = 0; 4616 if (nrank<redprocs) { 4617 lrows = size/redprocs; 4618 if (nrank<size%redprocs) lrows++; 4619 } 4620 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4621 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4622 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4623 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4624 row = nrank; 4625 ncols = xadj[1]-xadj[0]; 4626 cols = adjncy; 4627 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4628 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4629 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4630 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4631 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4632 ierr = PetscFree(xadj);CHKERRQ(ierr); 4633 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4634 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4635 ierr = PetscFree(vals);CHKERRQ(ierr); 4636 } else { 4637 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4638 } 4639 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4640 4641 /* Partition */ 4642 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4643 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4644 if (use_vwgt) { 4645 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4646 v_wgt[0] = local_size; 4647 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4648 } 4649 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4650 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4651 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4652 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4653 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4654 4655 /* renumber new_ranks to avoid "holes" in new set of processors */ 4656 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4657 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4658 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4659 if (!redprocs) { 4660 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4661 } else { 4662 PetscInt idxs[1]; 4663 PetscMPIInt tag; 4664 MPI_Request *reqs; 4665 4666 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4667 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4668 for (i=rstart;i<rend;i++) { 4669 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4670 } 4671 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4672 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4673 ierr = PetscFree(reqs);CHKERRQ(ierr); 4674 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4675 } 4676 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4677 /* clean up */ 4678 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4679 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4680 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4681 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4682 } 4683 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4684 4685 /* assemble parallel IS for sends */ 4686 i = 1; 4687 if (color) i=0; 4688 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4689 /* get back IS */ 4690 *is_sends = ranks_send_to; 4691 PetscFunctionReturn(0); 4692 } 4693 4694 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4695 4696 #undef __FUNCT__ 4697 #define __FUNCT__ "MatISSubassemble" 4698 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4699 { 4700 Mat local_mat; 4701 IS is_sends_internal; 4702 PetscInt rows,cols,new_local_rows; 4703 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4704 PetscBool ismatis,isdense,newisdense,destroy_mat; 4705 ISLocalToGlobalMapping l2gmap; 4706 PetscInt* l2gmap_indices; 4707 const PetscInt* is_indices; 4708 MatType new_local_type; 4709 /* buffers */ 4710 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4711 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4712 PetscInt *recv_buffer_idxs_local; 4713 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4714 /* MPI */ 4715 MPI_Comm comm,comm_n; 4716 PetscSubcomm subcomm; 4717 PetscMPIInt n_sends,n_recvs,commsize; 4718 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4719 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4720 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4721 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4722 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4723 PetscErrorCode ierr; 4724 4725 PetscFunctionBegin; 4726 /* TODO: add missing checks */ 4727 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4728 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4729 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4730 PetscValidLogicalCollectiveInt(mat,nis,7); 4731 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4732 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4733 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4734 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4735 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4736 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4737 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4738 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4739 PetscInt mrows,mcols,mnrows,mncols; 4740 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4741 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4742 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4743 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4744 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4745 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4746 } 4747 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4748 PetscValidLogicalCollectiveInt(mat,bs,0); 4749 /* prepare IS for sending if not provided */ 4750 if (!is_sends) { 4751 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4752 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4753 } else { 4754 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4755 is_sends_internal = is_sends; 4756 } 4757 4758 /* get comm */ 4759 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4760 4761 /* compute number of sends */ 4762 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4763 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4764 4765 /* compute number of receives */ 4766 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4767 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4768 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4769 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4770 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4771 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4772 ierr = PetscFree(iflags);CHKERRQ(ierr); 4773 4774 /* restrict comm if requested */ 4775 subcomm = 0; 4776 destroy_mat = PETSC_FALSE; 4777 if (restrict_comm) { 4778 PetscMPIInt color,subcommsize; 4779 4780 color = 0; 4781 if (restrict_full) { 4782 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4783 } else { 4784 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4785 } 4786 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4787 subcommsize = commsize - subcommsize; 4788 /* check if reuse has been requested */ 4789 if (reuse == MAT_REUSE_MATRIX) { 4790 if (*mat_n) { 4791 PetscMPIInt subcommsize2; 4792 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4793 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4794 comm_n = PetscObjectComm((PetscObject)*mat_n); 4795 } else { 4796 comm_n = PETSC_COMM_SELF; 4797 } 4798 } else { /* MAT_INITIAL_MATRIX */ 4799 PetscMPIInt rank; 4800 4801 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4802 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4803 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4804 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4805 comm_n = PetscSubcommChild(subcomm); 4806 } 4807 /* flag to destroy *mat_n if not significative */ 4808 if (color) destroy_mat = PETSC_TRUE; 4809 } else { 4810 comm_n = comm; 4811 } 4812 4813 /* prepare send/receive buffers */ 4814 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4815 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4816 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4817 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4818 if (nis) { 4819 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4820 } 4821 4822 /* Get data from local matrices */ 4823 if (!isdense) { 4824 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4825 /* TODO: See below some guidelines on how to prepare the local buffers */ 4826 /* 4827 send_buffer_vals should contain the raw values of the local matrix 4828 send_buffer_idxs should contain: 4829 - MatType_PRIVATE type 4830 - PetscInt size_of_l2gmap 4831 - PetscInt global_row_indices[size_of_l2gmap] 4832 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4833 */ 4834 } else { 4835 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4836 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4837 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4838 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4839 send_buffer_idxs[1] = i; 4840 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4841 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4842 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4843 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4844 for (i=0;i<n_sends;i++) { 4845 ilengths_vals[is_indices[i]] = len*len; 4846 ilengths_idxs[is_indices[i]] = len+2; 4847 } 4848 } 4849 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4850 /* additional is (if any) */ 4851 if (nis) { 4852 PetscMPIInt psum; 4853 PetscInt j; 4854 for (j=0,psum=0;j<nis;j++) { 4855 PetscInt plen; 4856 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4857 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4858 psum += len+1; /* indices + lenght */ 4859 } 4860 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4861 for (j=0,psum=0;j<nis;j++) { 4862 PetscInt plen; 4863 const PetscInt *is_array_idxs; 4864 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4865 send_buffer_idxs_is[psum] = plen; 4866 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4867 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4868 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4869 psum += plen+1; /* indices + lenght */ 4870 } 4871 for (i=0;i<n_sends;i++) { 4872 ilengths_idxs_is[is_indices[i]] = psum; 4873 } 4874 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4875 } 4876 4877 buf_size_idxs = 0; 4878 buf_size_vals = 0; 4879 buf_size_idxs_is = 0; 4880 for (i=0;i<n_recvs;i++) { 4881 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4882 buf_size_vals += (PetscInt)olengths_vals[i]; 4883 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4884 } 4885 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4886 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4887 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4888 4889 /* get new tags for clean communications */ 4890 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4891 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4892 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4893 4894 /* allocate for requests */ 4895 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4896 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4897 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4898 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4899 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4900 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4901 4902 /* communications */ 4903 ptr_idxs = recv_buffer_idxs; 4904 ptr_vals = recv_buffer_vals; 4905 ptr_idxs_is = recv_buffer_idxs_is; 4906 for (i=0;i<n_recvs;i++) { 4907 source_dest = onodes[i]; 4908 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4909 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4910 ptr_idxs += olengths_idxs[i]; 4911 ptr_vals += olengths_vals[i]; 4912 if (nis) { 4913 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4914 ptr_idxs_is += olengths_idxs_is[i]; 4915 } 4916 } 4917 for (i=0;i<n_sends;i++) { 4918 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4919 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4920 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4921 if (nis) { 4922 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4923 } 4924 } 4925 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4926 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4927 4928 /* assemble new l2g map */ 4929 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4930 ptr_idxs = recv_buffer_idxs; 4931 new_local_rows = 0; 4932 for (i=0;i<n_recvs;i++) { 4933 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4934 ptr_idxs += olengths_idxs[i]; 4935 } 4936 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4937 ptr_idxs = recv_buffer_idxs; 4938 new_local_rows = 0; 4939 for (i=0;i<n_recvs;i++) { 4940 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4941 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4942 ptr_idxs += olengths_idxs[i]; 4943 } 4944 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4945 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4946 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4947 4948 /* infer new local matrix type from received local matrices type */ 4949 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4950 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4951 if (n_recvs) { 4952 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4953 ptr_idxs = recv_buffer_idxs; 4954 for (i=0;i<n_recvs;i++) { 4955 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4956 new_local_type_private = MATAIJ_PRIVATE; 4957 break; 4958 } 4959 ptr_idxs += olengths_idxs[i]; 4960 } 4961 switch (new_local_type_private) { 4962 case MATDENSE_PRIVATE: 4963 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4964 new_local_type = MATSEQAIJ; 4965 bs = 1; 4966 } else { /* if I receive only 1 dense matrix */ 4967 new_local_type = MATSEQDENSE; 4968 bs = 1; 4969 } 4970 break; 4971 case MATAIJ_PRIVATE: 4972 new_local_type = MATSEQAIJ; 4973 bs = 1; 4974 break; 4975 case MATBAIJ_PRIVATE: 4976 new_local_type = MATSEQBAIJ; 4977 break; 4978 case MATSBAIJ_PRIVATE: 4979 new_local_type = MATSEQSBAIJ; 4980 break; 4981 default: 4982 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4983 break; 4984 } 4985 } else { /* by default, new_local_type is seqdense */ 4986 new_local_type = MATSEQDENSE; 4987 bs = 1; 4988 } 4989 4990 /* create MATIS object if needed */ 4991 if (reuse == MAT_INITIAL_MATRIX) { 4992 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4993 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4994 } else { 4995 /* it also destroys the local matrices */ 4996 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4997 } 4998 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4999 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5000 5001 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5002 5003 /* Global to local map of received indices */ 5004 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5005 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5006 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5007 5008 /* restore attributes -> type of incoming data and its size */ 5009 buf_size_idxs = 0; 5010 for (i=0;i<n_recvs;i++) { 5011 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5012 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5013 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5014 } 5015 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5016 5017 /* set preallocation */ 5018 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5019 if (!newisdense) { 5020 PetscInt *new_local_nnz=0; 5021 5022 ptr_vals = recv_buffer_vals; 5023 ptr_idxs = recv_buffer_idxs_local; 5024 if (n_recvs) { 5025 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5026 } 5027 for (i=0;i<n_recvs;i++) { 5028 PetscInt j; 5029 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5030 for (j=0;j<*(ptr_idxs+1);j++) { 5031 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5032 } 5033 } else { 5034 /* TODO */ 5035 } 5036 ptr_idxs += olengths_idxs[i]; 5037 } 5038 if (new_local_nnz) { 5039 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5040 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5041 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5042 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5043 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5044 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5045 } else { 5046 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5047 } 5048 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5049 } else { 5050 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5051 } 5052 5053 /* set values */ 5054 ptr_vals = recv_buffer_vals; 5055 ptr_idxs = recv_buffer_idxs_local; 5056 for (i=0;i<n_recvs;i++) { 5057 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5058 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5059 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5060 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5061 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5062 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5063 } else { 5064 /* TODO */ 5065 } 5066 ptr_idxs += olengths_idxs[i]; 5067 ptr_vals += olengths_vals[i]; 5068 } 5069 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5070 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5071 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5072 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5073 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5074 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5075 5076 #if 0 5077 if (!restrict_comm) { /* check */ 5078 Vec lvec,rvec; 5079 PetscReal infty_error; 5080 5081 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5082 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5083 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5084 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5085 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5086 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5087 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5088 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5089 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5090 } 5091 #endif 5092 5093 /* assemble new additional is (if any) */ 5094 if (nis) { 5095 PetscInt **temp_idxs,*count_is,j,psum; 5096 5097 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5098 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5099 ptr_idxs = recv_buffer_idxs_is; 5100 psum = 0; 5101 for (i=0;i<n_recvs;i++) { 5102 for (j=0;j<nis;j++) { 5103 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5104 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5105 psum += plen; 5106 ptr_idxs += plen+1; /* shift pointer to received data */ 5107 } 5108 } 5109 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5110 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5111 for (i=1;i<nis;i++) { 5112 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5113 } 5114 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5115 ptr_idxs = recv_buffer_idxs_is; 5116 for (i=0;i<n_recvs;i++) { 5117 for (j=0;j<nis;j++) { 5118 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5119 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5120 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5121 ptr_idxs += plen+1; /* shift pointer to received data */ 5122 } 5123 } 5124 for (i=0;i<nis;i++) { 5125 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5126 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5127 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5128 } 5129 ierr = PetscFree(count_is);CHKERRQ(ierr); 5130 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5131 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5132 } 5133 /* free workspace */ 5134 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5135 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5136 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5137 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5138 if (isdense) { 5139 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5140 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5141 } else { 5142 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5143 } 5144 if (nis) { 5145 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5146 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5147 } 5148 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5149 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5150 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5151 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5152 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5153 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5154 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5155 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5156 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5157 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5158 ierr = PetscFree(onodes);CHKERRQ(ierr); 5159 if (nis) { 5160 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5161 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5162 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5163 } 5164 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5165 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5166 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5167 for (i=0;i<nis;i++) { 5168 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5169 } 5170 *mat_n = NULL; 5171 } 5172 PetscFunctionReturn(0); 5173 } 5174 5175 /* temporary hack into ksp private data structure */ 5176 #include <petsc/private/kspimpl.h> 5177 5178 #undef __FUNCT__ 5179 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5180 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5181 { 5182 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5183 PC_IS *pcis = (PC_IS*)pc->data; 5184 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5185 MatNullSpace CoarseNullSpace=NULL; 5186 ISLocalToGlobalMapping coarse_islg; 5187 IS coarse_is,*isarray; 5188 PetscInt i,im_active=-1,active_procs=-1; 5189 PetscInt nis,nisdofs,nisneu,nisvert; 5190 PC pc_temp; 5191 PCType coarse_pc_type; 5192 KSPType coarse_ksp_type; 5193 PetscBool multilevel_requested,multilevel_allowed; 5194 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5195 Mat t_coarse_mat_is; 5196 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5197 PetscMPIInt all_procs; 5198 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5199 PetscBool compute_vecs = PETSC_FALSE; 5200 PetscScalar *array; 5201 PetscErrorCode ierr; 5202 5203 PetscFunctionBegin; 5204 /* Assign global numbering to coarse dofs */ 5205 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5206 PetscInt ocoarse_size; 5207 compute_vecs = PETSC_TRUE; 5208 ocoarse_size = pcbddc->coarse_size; 5209 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5210 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5211 /* see if we can avoid some work */ 5212 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5213 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5214 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5215 PC pc; 5216 PetscBool isbddc; 5217 5218 /* temporary workaround since PCBDDC does not have a reset method so far */ 5219 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5220 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5221 if (isbddc) { 5222 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5223 } else { 5224 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5225 } 5226 coarse_reuse = PETSC_FALSE; 5227 } else { /* we can safely reuse already computed coarse matrix */ 5228 coarse_reuse = PETSC_TRUE; 5229 } 5230 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5231 coarse_reuse = PETSC_FALSE; 5232 } 5233 /* reset any subassembling information */ 5234 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5235 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5236 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5237 coarse_reuse = PETSC_TRUE; 5238 } 5239 5240 /* count "active" (i.e. with positive local size) and "void" processes */ 5241 im_active = !!(pcis->n); 5242 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5243 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5244 void_procs = all_procs-active_procs; 5245 csin_type_simple = PETSC_TRUE; 5246 redist = PETSC_FALSE; 5247 if (pcbddc->current_level && void_procs) { 5248 csin_ml = PETSC_TRUE; 5249 ncoarse_ml = void_procs; 5250 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5251 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5252 csin_ds = PETSC_TRUE; 5253 ncoarse_ds = pcbddc->redistribute_coarse; 5254 redist = PETSC_TRUE; 5255 } else { 5256 csin_ds = PETSC_TRUE; 5257 ncoarse_ds = active_procs; 5258 redist = PETSC_TRUE; 5259 } 5260 } else { 5261 csin_ml = PETSC_FALSE; 5262 ncoarse_ml = all_procs; 5263 if (void_procs) { 5264 csin_ds = PETSC_TRUE; 5265 ncoarse_ds = void_procs; 5266 csin_type_simple = PETSC_FALSE; 5267 } else { 5268 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5269 csin_ds = PETSC_TRUE; 5270 ncoarse_ds = pcbddc->redistribute_coarse; 5271 redist = PETSC_TRUE; 5272 } else { 5273 csin_ds = PETSC_FALSE; 5274 ncoarse_ds = all_procs; 5275 } 5276 } 5277 } 5278 5279 /* 5280 test if we can go multilevel: three conditions must be satisfied: 5281 - we have not exceeded the number of levels requested 5282 - we can actually subassemble the active processes 5283 - we can find a suitable number of MPI processes where we can place the subassembled problem 5284 */ 5285 multilevel_allowed = PETSC_FALSE; 5286 multilevel_requested = PETSC_FALSE; 5287 if (pcbddc->current_level < pcbddc->max_levels) { 5288 multilevel_requested = PETSC_TRUE; 5289 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5290 multilevel_allowed = PETSC_FALSE; 5291 } else { 5292 multilevel_allowed = PETSC_TRUE; 5293 } 5294 } 5295 /* determine number of process partecipating to coarse solver */ 5296 if (multilevel_allowed) { 5297 ncoarse = ncoarse_ml; 5298 csin = csin_ml; 5299 redist = PETSC_FALSE; 5300 } else { 5301 ncoarse = ncoarse_ds; 5302 csin = csin_ds; 5303 } 5304 5305 /* creates temporary l2gmap and IS for coarse indexes */ 5306 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5307 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5308 5309 /* creates temporary MATIS object for coarse matrix */ 5310 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5311 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5312 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5313 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5314 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5315 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5316 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5317 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5318 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5319 5320 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5321 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5322 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5323 const PetscInt *idxs; 5324 ISLocalToGlobalMapping tmap; 5325 5326 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5327 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5328 /* allocate space for temporary storage */ 5329 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5330 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5331 /* allocate for IS array */ 5332 nisdofs = pcbddc->n_ISForDofsLocal; 5333 nisneu = !!pcbddc->NeumannBoundariesLocal; 5334 nisvert = 0; 5335 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5336 nisvert = 1; 5337 } 5338 nis = nisdofs + nisneu + nisvert; 5339 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5340 /* dofs splitting */ 5341 for (i=0;i<nisdofs;i++) { 5342 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5343 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5344 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5345 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5346 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5347 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5348 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5349 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5350 } 5351 /* neumann boundaries */ 5352 if (pcbddc->NeumannBoundariesLocal) { 5353 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5354 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5355 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5356 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5357 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5358 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5359 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5360 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5361 } 5362 /* primal vertices (benign) */ 5363 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5364 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5365 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5366 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5367 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5368 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5369 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5370 } 5371 /* free memory */ 5372 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5373 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5374 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5375 } else { 5376 nis = 0; 5377 nisdofs = 0; 5378 nisneu = 0; 5379 nisvert = 0; 5380 isarray = NULL; 5381 } 5382 /* destroy no longer needed map */ 5383 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5384 5385 /* restrict on coarse candidates (if needed) */ 5386 coarse_mat_is = NULL; 5387 if (csin) { 5388 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5389 if (redist) { 5390 PetscMPIInt rank; 5391 PetscInt spc,n_spc_p1,dest[1],destsize; 5392 5393 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5394 spc = active_procs/ncoarse; 5395 n_spc_p1 = active_procs%ncoarse; 5396 if (im_active) { 5397 destsize = 1; 5398 if (rank > n_spc_p1*(spc+1)-1) { 5399 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5400 } else { 5401 dest[0] = rank/(spc+1); 5402 } 5403 } else { 5404 destsize = 0; 5405 } 5406 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5407 } else if (csin_type_simple) { 5408 PetscMPIInt rank; 5409 PetscInt issize,isidx; 5410 5411 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5412 if (im_active) { 5413 issize = 1; 5414 isidx = (PetscInt)rank; 5415 } else { 5416 issize = 0; 5417 isidx = -1; 5418 } 5419 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5420 } else { /* get a suitable subassembling pattern from MATIS code */ 5421 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5422 } 5423 5424 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5425 if (!redist || ncoarse <= void_procs) { 5426 PetscInt ncoarse_cand,tissize,*nisindices; 5427 PetscInt *coarse_candidates; 5428 const PetscInt* tisindices; 5429 5430 /* get coarse candidates' ranks in pc communicator */ 5431 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5432 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5433 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5434 if (!coarse_candidates[i]) { 5435 coarse_candidates[ncoarse_cand++]=i; 5436 } 5437 } 5438 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5439 5440 5441 if (pcbddc->dbg_flag) { 5442 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5443 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5444 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5445 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5446 for (i=0;i<ncoarse_cand;i++) { 5447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5448 } 5449 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5450 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5451 } 5452 /* shift the pattern on coarse candidates */ 5453 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5454 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5455 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5456 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5457 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5458 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5459 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5460 } 5461 if (pcbddc->dbg_flag) { 5462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5463 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5464 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5465 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5466 } 5467 } 5468 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5469 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5470 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5471 } else { /* this is the last level, so use just receiving processes in subcomm */ 5472 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5473 } 5474 } else { 5475 if (pcbddc->dbg_flag) { 5476 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5477 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5478 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5479 } 5480 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5481 coarse_mat_is = t_coarse_mat_is; 5482 } 5483 5484 /* create local to global scatters for coarse problem */ 5485 if (compute_vecs) { 5486 PetscInt lrows; 5487 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5488 if (coarse_mat_is) { 5489 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5490 } else { 5491 lrows = 0; 5492 } 5493 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5494 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5495 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5496 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5497 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5498 } 5499 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5500 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5501 5502 /* set defaults for coarse KSP and PC */ 5503 if (multilevel_allowed) { 5504 coarse_ksp_type = KSPRICHARDSON; 5505 coarse_pc_type = PCBDDC; 5506 } else { 5507 coarse_ksp_type = KSPPREONLY; 5508 coarse_pc_type = PCREDUNDANT; 5509 } 5510 5511 /* print some info if requested */ 5512 if (pcbddc->dbg_flag) { 5513 if (!multilevel_allowed) { 5514 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5515 if (multilevel_requested) { 5516 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5517 } else if (pcbddc->max_levels) { 5518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5519 } 5520 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5521 } 5522 } 5523 5524 /* create the coarse KSP object only once with defaults */ 5525 if (coarse_mat_is) { 5526 MatReuse coarse_mat_reuse; 5527 PetscViewer dbg_viewer = NULL; 5528 if (pcbddc->dbg_flag) { 5529 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5530 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5531 } 5532 if (!pcbddc->coarse_ksp) { 5533 char prefix[256],str_level[16]; 5534 size_t len; 5535 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5536 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5537 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5538 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5539 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5540 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5541 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5542 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5543 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5544 /* prefix */ 5545 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5546 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5547 if (!pcbddc->current_level) { 5548 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5549 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5550 } else { 5551 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5552 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5553 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5554 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5555 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5556 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5557 } 5558 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5559 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5560 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5561 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5562 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5563 /* allow user customization */ 5564 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5565 } 5566 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5567 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5568 if (nisdofs) { 5569 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5570 for (i=0;i<nisdofs;i++) { 5571 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5572 } 5573 } 5574 if (nisneu) { 5575 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5576 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5577 } 5578 if (nisvert) { 5579 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5580 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5581 } 5582 5583 /* get some info after set from options */ 5584 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5585 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5586 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5587 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5588 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5589 isbddc = PETSC_FALSE; 5590 } 5591 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5592 if (isredundant) { 5593 KSP inner_ksp; 5594 PC inner_pc; 5595 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5596 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5597 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5598 } 5599 5600 /* assemble coarse matrix */ 5601 if (coarse_reuse) { 5602 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5603 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5604 coarse_mat_reuse = MAT_REUSE_MATRIX; 5605 } else { 5606 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5607 } 5608 if (isbddc || isnn) { 5609 if (isbddc) { /* currently there's no API for this */ 5610 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5611 pcbddc->detect_disconnected = PETSC_TRUE; 5612 } 5613 if (pcbddc->coarsening_ratio > 1) { 5614 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5615 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5616 if (pcbddc->dbg_flag) { 5617 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5618 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5619 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5620 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5621 } 5622 } 5623 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5624 } else { 5625 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5626 coarse_mat = coarse_mat_is; 5627 } 5628 } else { 5629 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5630 } 5631 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5632 5633 /* propagate symmetry info of coarse matrix */ 5634 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5635 if (pc->pmat->symmetric_set) { 5636 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5637 } 5638 if (pc->pmat->hermitian_set) { 5639 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5640 } 5641 if (pc->pmat->spd_set) { 5642 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5643 } 5644 /* set operators */ 5645 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5646 if (pcbddc->dbg_flag) { 5647 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5648 } 5649 } else { /* processes non partecipating to coarse solver (if any) */ 5650 coarse_mat = 0; 5651 } 5652 ierr = PetscFree(isarray);CHKERRQ(ierr); 5653 #if 0 5654 { 5655 PetscViewer viewer; 5656 char filename[256]; 5657 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5658 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5659 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5660 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5661 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5662 } 5663 #endif 5664 5665 /* Compute coarse null space (special handling by BDDC only) */ 5666 #if 0 5667 if (pcbddc->NullSpace) { 5668 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5669 } 5670 #endif 5671 /* hack */ 5672 if (pcbddc->coarse_ksp) { 5673 Vec crhs,csol; 5674 5675 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5676 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5677 if (!csol) { 5678 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5679 } 5680 if (!crhs) { 5681 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5682 } 5683 } 5684 5685 /* compute null space for coarse solver if the benign trick has been requested */ 5686 if (pcbddc->benign_null) { 5687 5688 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5689 for (i=0;i<pcbddc->benign_n;i++) { 5690 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5691 } 5692 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5693 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5694 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5695 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5696 if (coarse_mat) { 5697 Vec nullv; 5698 PetscScalar *array,*array2; 5699 PetscInt nl; 5700 5701 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5702 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5703 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5704 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5705 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5706 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5707 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5708 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5709 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5710 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5711 } 5712 } 5713 5714 if (pcbddc->coarse_ksp) { 5715 PetscBool ispreonly; 5716 5717 if (CoarseNullSpace) { 5718 PetscBool isnull; 5719 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5720 if (0) { 5721 if (isbddc && !pcbddc->benign_saddle_point) { 5722 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5723 } else { 5724 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5725 } 5726 } else { 5727 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5728 } 5729 } 5730 /* setup coarse ksp */ 5731 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5732 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5733 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5734 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5735 KSP check_ksp; 5736 KSPType check_ksp_type; 5737 PC check_pc; 5738 Vec check_vec,coarse_vec; 5739 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5740 PetscInt its; 5741 PetscBool compute_eigs; 5742 PetscReal *eigs_r,*eigs_c; 5743 PetscInt neigs; 5744 const char *prefix; 5745 5746 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5747 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5748 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5749 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5750 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5751 if (ispreonly) { 5752 check_ksp_type = KSPPREONLY; 5753 compute_eigs = PETSC_FALSE; 5754 } else { 5755 check_ksp_type = KSPGMRES; 5756 compute_eigs = PETSC_TRUE; 5757 } 5758 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5759 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5760 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5761 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5762 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5763 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5764 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5765 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5766 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5767 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5768 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5769 /* create random vec */ 5770 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5771 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5772 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5773 if (CoarseNullSpace) { 5774 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5775 } 5776 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5777 /* solve coarse problem */ 5778 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5779 if (CoarseNullSpace) { 5780 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5781 } 5782 /* set eigenvalue estimation if preonly has not been requested */ 5783 if (compute_eigs) { 5784 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5785 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5786 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5787 lambda_max = eigs_r[neigs-1]; 5788 lambda_min = eigs_r[0]; 5789 if (pcbddc->use_coarse_estimates) { 5790 if (lambda_max>lambda_min) { 5791 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5792 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5793 } 5794 } 5795 } 5796 5797 /* check coarse problem residual error */ 5798 if (pcbddc->dbg_flag) { 5799 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5800 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5801 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5802 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5803 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5804 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5805 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5806 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5807 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5808 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5809 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5810 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5811 if (CoarseNullSpace) { 5812 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5813 } 5814 if (compute_eigs) { 5815 PetscReal lambda_max_s,lambda_min_s; 5816 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5817 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5818 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5819 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5820 for (i=0;i<neigs;i++) { 5821 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5822 } 5823 } 5824 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5825 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5826 } 5827 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5828 if (compute_eigs) { 5829 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5830 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5831 } 5832 } 5833 } 5834 /* print additional info */ 5835 if (pcbddc->dbg_flag) { 5836 /* waits until all processes reaches this point */ 5837 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5838 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5840 } 5841 5842 /* free memory */ 5843 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5844 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5845 PetscFunctionReturn(0); 5846 } 5847 5848 #undef __FUNCT__ 5849 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5850 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5851 { 5852 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5853 PC_IS* pcis = (PC_IS*)pc->data; 5854 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5855 IS subset,subset_mult,subset_n; 5856 PetscInt local_size,coarse_size=0; 5857 PetscInt *local_primal_indices=NULL; 5858 const PetscInt *t_local_primal_indices; 5859 PetscErrorCode ierr; 5860 5861 PetscFunctionBegin; 5862 /* Compute global number of coarse dofs */ 5863 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5864 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5865 } 5866 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5867 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5868 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5869 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5870 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5871 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5872 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5873 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5874 if (local_size != pcbddc->local_primal_size) { 5875 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5876 } 5877 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5878 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5879 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5880 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5881 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5882 5883 /* check numbering */ 5884 if (pcbddc->dbg_flag) { 5885 PetscScalar coarsesum,*array,*array2; 5886 PetscInt i; 5887 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5888 5889 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5890 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5891 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5892 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5893 /* counter */ 5894 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5895 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5896 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5897 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5898 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5899 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5900 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5901 for (i=0;i<pcbddc->local_primal_size;i++) { 5902 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5903 } 5904 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5905 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5906 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5907 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5908 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5909 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5910 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5911 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5912 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5913 for (i=0;i<pcis->n;i++) { 5914 if (array[i] != 0.0 && array[i] != array2[i]) { 5915 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5916 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5917 set_error = PETSC_TRUE; 5918 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5919 } 5920 } 5921 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5922 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5923 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5924 for (i=0;i<pcis->n;i++) { 5925 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5926 } 5927 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5928 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5929 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5930 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5931 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5932 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5933 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5934 PetscInt *gidxs; 5935 5936 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5937 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5938 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5939 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5941 for (i=0;i<pcbddc->local_primal_size;i++) { 5942 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5943 } 5944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5945 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5946 } 5947 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5948 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5949 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5950 } 5951 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5952 /* get back data */ 5953 *coarse_size_n = coarse_size; 5954 *local_primal_indices_n = local_primal_indices; 5955 PetscFunctionReturn(0); 5956 } 5957 5958 #undef __FUNCT__ 5959 #define __FUNCT__ "PCBDDCGlobalToLocal" 5960 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5961 { 5962 IS localis_t; 5963 PetscInt i,lsize,*idxs,n; 5964 PetscScalar *vals; 5965 PetscErrorCode ierr; 5966 5967 PetscFunctionBegin; 5968 /* get indices in local ordering exploiting local to global map */ 5969 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5970 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5971 for (i=0;i<lsize;i++) vals[i] = 1.0; 5972 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5973 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5974 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5975 if (idxs) { /* multilevel guard */ 5976 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5977 } 5978 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5979 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5980 ierr = PetscFree(vals);CHKERRQ(ierr); 5981 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5982 /* now compute set in local ordering */ 5983 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5984 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5985 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5986 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5987 for (i=0,lsize=0;i<n;i++) { 5988 if (PetscRealPart(vals[i]) > 0.5) { 5989 lsize++; 5990 } 5991 } 5992 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5993 for (i=0,lsize=0;i<n;i++) { 5994 if (PetscRealPart(vals[i]) > 0.5) { 5995 idxs[lsize++] = i; 5996 } 5997 } 5998 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5999 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6000 *localis = localis_t; 6001 PetscFunctionReturn(0); 6002 } 6003 6004 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6005 #undef __FUNCT__ 6006 #define __FUNCT__ "PCBDDCMatMult_Private" 6007 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6008 { 6009 PCBDDCChange_ctx change_ctx; 6010 PetscErrorCode ierr; 6011 6012 PetscFunctionBegin; 6013 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6014 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6015 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6016 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6017 PetscFunctionReturn(0); 6018 } 6019 6020 #undef __FUNCT__ 6021 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6022 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6023 { 6024 PCBDDCChange_ctx change_ctx; 6025 PetscErrorCode ierr; 6026 6027 PetscFunctionBegin; 6028 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6029 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6030 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6031 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6032 PetscFunctionReturn(0); 6033 } 6034 6035 #undef __FUNCT__ 6036 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6037 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6038 { 6039 PC_IS *pcis=(PC_IS*)pc->data; 6040 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6041 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6042 Mat S_j; 6043 PetscInt *used_xadj,*used_adjncy; 6044 PetscBool free_used_adj; 6045 PetscErrorCode ierr; 6046 6047 PetscFunctionBegin; 6048 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6049 free_used_adj = PETSC_FALSE; 6050 if (pcbddc->sub_schurs_layers == -1) { 6051 used_xadj = NULL; 6052 used_adjncy = NULL; 6053 } else { 6054 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6055 used_xadj = pcbddc->mat_graph->xadj; 6056 used_adjncy = pcbddc->mat_graph->adjncy; 6057 } else if (pcbddc->computed_rowadj) { 6058 used_xadj = pcbddc->mat_graph->xadj; 6059 used_adjncy = pcbddc->mat_graph->adjncy; 6060 } else { 6061 PetscBool flg_row=PETSC_FALSE; 6062 const PetscInt *xadj,*adjncy; 6063 PetscInt nvtxs; 6064 6065 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6066 if (flg_row) { 6067 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6068 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6069 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6070 free_used_adj = PETSC_TRUE; 6071 } else { 6072 pcbddc->sub_schurs_layers = -1; 6073 used_xadj = NULL; 6074 used_adjncy = NULL; 6075 } 6076 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6077 } 6078 } 6079 6080 /* setup sub_schurs data */ 6081 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6082 if (!sub_schurs->schur_explicit) { 6083 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6084 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6085 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6086 } else { 6087 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6088 PetscBool isseqaij; 6089 PetscInt benign_n; 6090 6091 if (!pcbddc->use_vertices && reuse_solvers) { 6092 PetscInt n_vertices; 6093 6094 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6095 reuse_solvers = (PetscBool)!n_vertices; 6096 } 6097 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6098 if (!isseqaij) { 6099 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6100 if (matis->A == pcbddc->local_mat) { 6101 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6102 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6103 } else { 6104 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6105 } 6106 } 6107 if (!pcbddc->benign_change_explicit) { 6108 benign_n = pcbddc->benign_n; 6109 } else { 6110 benign_n = 0; 6111 } 6112 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6113 } 6114 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6115 6116 /* free adjacency */ 6117 if (free_used_adj) { 6118 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6119 } 6120 PetscFunctionReturn(0); 6121 } 6122 6123 #undef __FUNCT__ 6124 #define __FUNCT__ "PCBDDCInitSubSchurs" 6125 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6126 { 6127 PC_IS *pcis=(PC_IS*)pc->data; 6128 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6129 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6130 PCBDDCGraph graph; 6131 PetscErrorCode ierr; 6132 6133 PetscFunctionBegin; 6134 /* attach interface graph for determining subsets */ 6135 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6136 IS verticesIS,verticescomm; 6137 PetscInt vsize,*idxs; 6138 6139 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6140 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6141 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6143 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6144 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6145 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6146 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6147 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6148 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6149 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6150 /* 6151 if (pcbddc->dbg_flag) { 6152 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6153 } 6154 */ 6155 } else { 6156 graph = pcbddc->mat_graph; 6157 } 6158 6159 /* sub_schurs init */ 6160 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6161 6162 /* free graph struct */ 6163 if (pcbddc->sub_schurs_rebuild) { 6164 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6165 } 6166 PetscFunctionReturn(0); 6167 } 6168 6169 #undef __FUNCT__ 6170 #define __FUNCT__ "PCBDDCCheckOperator" 6171 PetscErrorCode PCBDDCCheckOperator(PC pc) 6172 { 6173 PC_IS *pcis=(PC_IS*)pc->data; 6174 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6175 PetscErrorCode ierr; 6176 6177 PetscFunctionBegin; 6178 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6179 IS zerodiag = NULL; 6180 Mat S_j,B0_B=NULL; 6181 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6182 PetscScalar *p0_check,*array,*array2; 6183 PetscReal norm; 6184 PetscInt i; 6185 6186 /* B0 and B0_B */ 6187 if (zerodiag) { 6188 IS dummy; 6189 6190 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6191 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6192 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6193 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6194 } 6195 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6196 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6197 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6198 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6199 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6200 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6201 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6202 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6203 /* S_j */ 6204 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6205 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6206 6207 /* mimic vector in \widetilde{W}_\Gamma */ 6208 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6209 /* continuous in primal space */ 6210 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6211 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6212 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6213 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6214 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6215 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6216 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6217 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6218 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6219 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6220 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6221 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6222 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6223 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6224 6225 /* assemble rhs for coarse problem */ 6226 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6227 /* local with Schur */ 6228 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6229 if (zerodiag) { 6230 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6231 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6232 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6233 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6234 } 6235 /* sum on primal nodes the local contributions */ 6236 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6237 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6238 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6239 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6240 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6241 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6242 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6243 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6244 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6245 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6246 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6247 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6248 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6249 /* scale primal nodes (BDDC sums contibutions) */ 6250 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6251 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6252 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6253 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6254 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6255 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6256 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6257 /* global: \widetilde{B0}_B w_\Gamma */ 6258 if (zerodiag) { 6259 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6260 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6261 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6262 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6263 } 6264 /* BDDC */ 6265 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6266 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6267 6268 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6269 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6270 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6271 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6272 for (i=0;i<pcbddc->benign_n;i++) { 6273 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6274 } 6275 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6276 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6277 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6278 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6279 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6280 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6281 } 6282 PetscFunctionReturn(0); 6283 } 6284