1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 PetscInt *nnz; 799 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 800 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 801 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 802 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 803 for (i=0;i<pcbddc->benign_n;i++) { 804 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 805 nnz[i] = n - nnz[i]; 806 } 807 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 808 ierr = PetscFree(nnz);CHKERRQ(ierr); 809 } 810 811 for (i=0;i<pcbddc->benign_n;i++) { 812 PetscScalar *array; 813 PetscInt *idxs,j,nz,cum; 814 815 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 818 for (j=0;j<nz;j++) vals[j] = 1.; 819 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 823 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 824 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 825 cum = 0; 826 for (j=0;j<n;j++) { 827 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 828 vals[cum] = array[j]; 829 idxs_ins[cum] = j; 830 cum++; 831 } 832 } 833 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 834 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 835 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 840 } 841 } else { /* push */ 842 if (pcbddc->benign_change_explicit) { 843 PetscInt i; 844 845 for (i=0;i<pcbddc->benign_n;i++) { 846 PetscScalar *B0_vals; 847 PetscInt *B0_cols,B0_ncol; 848 849 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 850 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 851 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 852 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 853 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 859 } 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCAdaptiveSelection" 866 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 867 { 868 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 869 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 870 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 871 PetscBLASInt *B_iwork,*B_ifail; 872 PetscScalar *work,lwork; 873 PetscScalar *St,*S,*eigv; 874 PetscScalar *Sarray,*Starray; 875 PetscReal *eigs,thresh; 876 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 877 PetscBool allocated_S_St; 878 #if defined(PETSC_USE_COMPLEX) 879 PetscReal *rwork; 880 #endif 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 if (!sub_schurs->schur_explicit) { 885 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 886 } 887 888 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 if (pcbddc->dbg_flag) { 893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 if (pcbddc->dbg_flag) { 900 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 901 } 902 903 /* max size of subsets */ 904 mss = 0; 905 for (i=0;i<sub_schurs->n_subs;i++) { 906 PetscInt subset_size; 907 908 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 909 mss = PetscMax(mss,subset_size); 910 } 911 912 /* min/max and threshold */ 913 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 914 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 915 nmax = PetscMax(nmin,nmax); 916 allocated_S_St = PETSC_FALSE; 917 if (nmin) { 918 allocated_S_St = PETSC_TRUE; 919 } 920 921 /* allocate lapack workspace */ 922 cum = cum2 = 0; 923 maxneigs = 0; 924 for (i=0;i<sub_schurs->n_subs;i++) { 925 PetscInt n,subset_size; 926 927 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 928 n = PetscMin(subset_size,nmax); 929 cum += subset_size; 930 cum2 += subset_size*n; 931 maxneigs = PetscMax(maxneigs,n); 932 } 933 if (mss) { 934 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 935 PetscBLASInt B_itype = 1; 936 PetscBLASInt B_N = mss; 937 PetscReal zero = 0.0; 938 PetscReal eps = 0.0; /* dlamch? */ 939 940 B_lwork = -1; 941 S = NULL; 942 St = NULL; 943 eigs = NULL; 944 eigv = NULL; 945 B_iwork = NULL; 946 B_ifail = NULL; 947 #if defined(PETSC_USE_COMPLEX) 948 rwork = NULL; 949 #endif 950 thresh = 1.0; 951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 952 #if defined(PETSC_USE_COMPLEX) 953 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 954 #else 955 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 956 #endif 957 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 958 ierr = PetscFPTrapPop();CHKERRQ(ierr); 959 } else { 960 /* TODO */ 961 } 962 } else { 963 lwork = 0; 964 } 965 966 nv = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 968 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 969 } 970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 971 if (allocated_S_St) { 972 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 973 } 974 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 977 #endif 978 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 979 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 980 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 981 nv+cum,&pcbddc->adaptive_constraints_idxs, 982 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 983 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 984 985 maxneigs = 0; 986 cum = cum2 = cumarray = 0; 987 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 988 pcbddc->adaptive_constraints_data_ptr[0] = 0; 989 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 990 const PetscInt *idxs; 991 992 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 993 for (cum=0;cum<nv;cum++) { 994 pcbddc->adaptive_constraints_n[cum] = 1; 995 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 996 pcbddc->adaptive_constraints_data[cum] = 1.0; 997 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 998 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 999 } 1000 cum2 = cum; 1001 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1002 } 1003 1004 if (mss) { /* multilevel */ 1005 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1006 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1007 } 1008 1009 thresh = pcbddc->adaptive_threshold; 1010 for (i=0;i<sub_schurs->n_subs;i++) { 1011 const PetscInt *idxs; 1012 PetscReal infty = PETSC_MAX_REAL; 1013 PetscInt j,subset_size,eigs_start = 0; 1014 PetscBLASInt B_N; 1015 PetscBool same_data = PETSC_FALSE; 1016 1017 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1018 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1019 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1020 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1021 if (sub_schurs->is_hermitian) { 1022 PetscInt j,k; 1023 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1024 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1026 } 1027 for (j=0;j<subset_size;j++) { 1028 for (k=j;k<subset_size;k++) { 1029 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1030 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1031 } 1032 } 1033 } else { 1034 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1035 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1036 } 1037 } else { 1038 S = Sarray + cumarray; 1039 St = Starray + cumarray; 1040 } 1041 1042 /* see if we can save some work */ 1043 if (sub_schurs->n_subs == 1) { 1044 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1045 } 1046 1047 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1048 B_neigs = 0; 1049 } else { 1050 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1051 PetscBLASInt B_itype = 1; 1052 PetscBLASInt B_IL, B_IU; 1053 PetscReal eps = -1.0; /* dlamch? */ 1054 PetscInt nmin_s; 1055 1056 if (pcbddc->dbg_flag) { 1057 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1058 } 1059 1060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1061 if (thresh > 1.+PETSC_SMALL) { 1062 1063 /* ask for eigenvalues larger than thresh */ 1064 #if defined(PETSC_USE_COMPLEX) 1065 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1066 #else 1067 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1068 #endif 1069 } else { 1070 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1071 B_IL = 1; 1072 #if defined(PETSC_USE_COMPLEX) 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1074 #else 1075 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1076 #endif 1077 } 1078 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1079 if (B_ierr) { 1080 if (B_ierr < 0 ) { 1081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1082 } else if (B_ierr <= B_N) { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1084 } else { 1085 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1086 } 1087 } 1088 1089 if (B_neigs > nmax) { 1090 if (pcbddc->dbg_flag) { 1091 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1092 } 1093 eigs_start = B_neigs -nmax; 1094 B_neigs = nmax; 1095 } 1096 1097 nmin_s = PetscMin(nmin,B_N); 1098 if (B_neigs < nmin_s) { 1099 PetscBLASInt B_neigs2; 1100 1101 B_IU = B_N - B_neigs; 1102 B_IL = B_N - nmin_s + 1; 1103 if (pcbddc->dbg_flag) { 1104 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1105 } 1106 if (sub_schurs->is_hermitian) { 1107 PetscInt j; 1108 for (j=0;j<subset_size;j++) { 1109 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1110 } 1111 for (j=0;j<subset_size;j++) { 1112 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1113 } 1114 } else { 1115 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1116 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1117 } 1118 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1119 #if defined(PETSC_USE_COMPLEX) 1120 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1121 #else 1122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1123 #endif 1124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1125 B_neigs += B_neigs2; 1126 } 1127 if (B_ierr) { 1128 if (B_ierr < 0 ) { 1129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1130 } else if (B_ierr <= B_N) { 1131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1132 } else { 1133 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1134 } 1135 } 1136 if (pcbddc->dbg_flag) { 1137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1138 for (j=0;j<B_neigs;j++) { 1139 if (eigs[j] == 0.0) { 1140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1141 } else { 1142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1143 } 1144 } 1145 } 1146 } else { 1147 /* TODO */ 1148 } 1149 } 1150 maxneigs = PetscMax(B_neigs,maxneigs); 1151 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1152 if (B_neigs) { 1153 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1154 1155 if (pcbddc->dbg_flag > 1) { 1156 PetscInt ii; 1157 for (ii=0;ii<B_neigs;ii++) { 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1159 for (j=0;j<B_N;j++) { 1160 #if defined(PETSC_USE_COMPLEX) 1161 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1162 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1164 #else 1165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1166 #endif 1167 } 1168 } 1169 } 1170 #if 0 1171 for (j=0;j<B_neigs;j++) { 1172 PetscBLASInt Blas_N,Blas_one = 1.0; 1173 PetscScalar norm; 1174 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1175 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1176 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1177 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1178 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1179 } else { 1180 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1181 } 1182 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1183 } 1184 #endif 1185 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1186 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1187 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1188 cum++; 1189 } 1190 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1191 /* shift for next computation */ 1192 cumarray += subset_size*subset_size; 1193 } 1194 if (pcbddc->dbg_flag) { 1195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1196 } 1197 1198 if (mss) { 1199 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1200 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1201 /* destroy matrices (junk) */ 1202 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1203 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1204 } 1205 if (allocated_S_St) { 1206 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1207 } 1208 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1209 #if defined(PETSC_USE_COMPLEX) 1210 ierr = PetscFree(rwork);CHKERRQ(ierr); 1211 #endif 1212 if (pcbddc->dbg_flag) { 1213 PetscInt maxneigs_r; 1214 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "PCBDDCSetUpSolvers" 1222 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1223 { 1224 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1225 PetscScalar *coarse_submat_vals; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1230 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1231 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1232 1233 /* Setup local neumann solver ksp_R */ 1234 /* PCBDDCSetUpLocalScatters should be called first! */ 1235 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1236 1237 /* Change global null space passed in by the user if change of basis has been requested */ 1238 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1239 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1240 } 1241 1242 /* 1243 Setup local correction and local part of coarse basis. 1244 Gives back the dense local part of the coarse matrix in column major ordering 1245 */ 1246 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1247 1248 /* Compute total number of coarse nodes and setup coarse solver */ 1249 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1250 1251 /* free */ 1252 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "PCBDDCResetCustomization" 1258 PetscErrorCode PCBDDCResetCustomization(PC pc) 1259 { 1260 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1265 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1266 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1267 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1268 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1269 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1270 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1271 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1272 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1273 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1274 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1275 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "PCBDDCResetTopography" 1281 PetscErrorCode PCBDDCResetTopography(PC pc) 1282 { 1283 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1284 PetscInt i; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1289 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1290 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1291 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1292 for (i=0;i<pcbddc->n_local_subs;i++) { 1293 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1296 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "PCBDDCResetSolvers" 1302 PetscErrorCode PCBDDCResetSolvers(PC pc) 1303 { 1304 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1309 if (pcbddc->coarse_phi_B) { 1310 PetscScalar *array; 1311 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1312 ierr = PetscFree(array);CHKERRQ(ierr); 1313 } 1314 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1315 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1316 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1317 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1318 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1319 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1320 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1321 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1322 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1323 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1324 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1325 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1328 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1329 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1330 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1331 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1332 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1333 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1334 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1335 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1336 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1338 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1339 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1341 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1342 if (pcbddc->benign_zerodiag_subs) { 1343 PetscInt i; 1344 for (i=0;i<pcbddc->benign_n;i++) { 1345 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1348 } 1349 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1355 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1356 { 1357 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1358 PC_IS *pcis = (PC_IS*)pc->data; 1359 VecType impVecType; 1360 PetscInt n_constraints,n_R,old_size; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (!pcbddc->ConstraintMatrix) { 1365 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1366 } 1367 /* get sizes */ 1368 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1369 n_R = pcis->n - pcbddc->n_vertices; 1370 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1371 /* local work vectors (try to avoid unneeded work)*/ 1372 /* R nodes */ 1373 old_size = -1; 1374 if (pcbddc->vec1_R) { 1375 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1376 } 1377 if (n_R != old_size) { 1378 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1379 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1380 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1381 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1382 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1383 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1384 } 1385 /* local primal dofs */ 1386 old_size = -1; 1387 if (pcbddc->vec1_P) { 1388 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1389 } 1390 if (pcbddc->local_primal_size != old_size) { 1391 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1392 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1393 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1394 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1395 } 1396 /* local explicit constraints */ 1397 old_size = -1; 1398 if (pcbddc->vec1_C) { 1399 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1400 } 1401 if (n_constraints && n_constraints != old_size) { 1402 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1403 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1404 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1405 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "PCBDDCSetUpCorrection" 1412 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1413 { 1414 PetscErrorCode ierr; 1415 /* pointers to pcis and pcbddc */ 1416 PC_IS* pcis = (PC_IS*)pc->data; 1417 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1418 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1419 /* submatrices of local problem */ 1420 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1421 /* submatrices of benign trick */ 1422 Mat B0_V = NULL; 1423 /* submatrices of local coarse problem */ 1424 Mat S_VV,S_CV,S_VC,S_CC; 1425 /* working matrices */ 1426 Mat C_CR; 1427 /* additional working stuff */ 1428 PC pc_R; 1429 Mat F; 1430 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1431 1432 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1433 PetscScalar *work; 1434 PetscInt *idx_V_B; 1435 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1436 PetscInt i,n_R,n_D,n_B; 1437 1438 /* some shortcuts to scalars */ 1439 PetscScalar one=1.0,m_one=-1.0; 1440 1441 PetscFunctionBegin; 1442 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1444 } 1445 1446 /* Set Non-overlapping dimensions */ 1447 n_vertices = pcbddc->n_vertices; 1448 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1449 n_B = pcis->n_B; 1450 n_D = pcis->n - n_B; 1451 n_R = pcis->n - n_vertices; 1452 1453 /* vertices in boundary numbering */ 1454 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1455 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1456 if (i != n_vertices) { 1457 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1458 } 1459 1460 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1461 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1462 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1463 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1464 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1465 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1466 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1467 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1468 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1469 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1470 1471 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1472 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1473 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1474 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1475 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1476 lda_rhs = n_R; 1477 need_benign_correction = PETSC_FALSE; 1478 if (isLU || isILU || isCHOL) { 1479 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1480 } else if (sub_schurs->reuse_solver) { 1481 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1482 MatFactorType type; 1483 1484 F = reuse_solver->F; 1485 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1486 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1487 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1488 need_benign_correction = !!reuse_solver->benign_n; 1489 } else { 1490 F = NULL; 1491 } 1492 1493 /* allocate workspace */ 1494 n = 0; 1495 if (n_constraints) { 1496 n += lda_rhs*n_constraints; 1497 } 1498 if (n_vertices) { 1499 n = PetscMax(2*lda_rhs*n_vertices,n); 1500 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1501 } 1502 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1503 1504 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1505 if (n_constraints) { 1506 Mat M1,M2,M3; 1507 Mat auxmat; 1508 IS is_aux; 1509 PetscScalar *array,*array2; 1510 1511 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1512 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1513 1514 /* Extract constraints on R nodes: C_{CR} */ 1515 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1516 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1517 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1518 1519 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1520 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1521 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1522 for (i=0;i<n_constraints;i++) { 1523 const PetscScalar *row_cmat_values; 1524 const PetscInt *row_cmat_indices; 1525 PetscInt size_of_constraint,j; 1526 1527 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1528 for (j=0;j<size_of_constraint;j++) { 1529 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1530 } 1531 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1532 } 1533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1534 if (F) { 1535 Mat B; 1536 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1538 if (need_benign_correction) { 1539 PetscScalar *marr; 1540 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1541 1542 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1543 for (i=0;i<n_constraints;i++) { 1544 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1545 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1546 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1547 } 1548 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1549 } 1550 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1551 if (need_benign_correction) { 1552 PetscScalar *marr; 1553 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1554 1555 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1556 for (i=0;i<n_constraints;i++) { 1557 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1558 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1559 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1560 } 1561 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1562 } 1563 ierr = MatDestroy(&B);CHKERRQ(ierr); 1564 } else { 1565 PetscScalar *marr; 1566 1567 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1568 for (i=0;i<n_constraints;i++) { 1569 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1570 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1571 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1572 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1573 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1574 } 1575 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1576 } 1577 if (!pcbddc->switch_static) { 1578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1579 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1580 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1581 for (i=0;i<n_constraints;i++) { 1582 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1583 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1587 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1588 } 1589 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1590 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1591 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1592 } else { 1593 if (lda_rhs != n_R) { 1594 IS dummy; 1595 1596 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1597 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1598 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1599 } else { 1600 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1601 pcbddc->local_auxmat2 = local_auxmat2_R; 1602 } 1603 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1604 } 1605 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1606 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1607 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1608 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1609 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1610 if (isCHOL) { 1611 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1612 } else { 1613 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1614 } 1615 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1616 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1617 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1618 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1619 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1620 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1621 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1622 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1623 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1624 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1625 } 1626 /* Get submatrices from subdomain matrix */ 1627 if (pcbddc->benign_n) { 1628 IS dummy; 1629 Mat B0_R; 1630 PetscReal norm; 1631 1632 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1633 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1634 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1635 if (norm > PETSC_SMALL) { 1636 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1637 } 1638 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1639 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1640 } 1641 1642 if (n_vertices) { 1643 IS is_aux; 1644 1645 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1646 IS tis; 1647 1648 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1649 ierr = ISSort(tis);CHKERRQ(ierr); 1650 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1651 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1652 } else { 1653 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1654 } 1655 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1656 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1657 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1658 if (pcbddc->benign_n) { 1659 IS dummy; 1660 1661 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1662 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1663 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1664 } 1665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1666 } 1667 1668 /* Matrix of coarse basis functions (local) */ 1669 if (pcbddc->coarse_phi_B) { 1670 PetscInt on_B,on_primal,on_D=n_D; 1671 if (pcbddc->coarse_phi_D) { 1672 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1673 } 1674 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1675 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1676 PetscScalar *marray; 1677 1678 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1679 ierr = PetscFree(marray);CHKERRQ(ierr); 1680 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1681 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1682 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1683 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1684 } 1685 } 1686 1687 if (!pcbddc->coarse_phi_B) { 1688 PetscScalar *marray; 1689 1690 n = n_B*pcbddc->local_primal_size; 1691 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1692 n += n_D*pcbddc->local_primal_size; 1693 } 1694 if (!pcbddc->symmetric_primal) { 1695 n *= 2; 1696 } 1697 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1698 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1699 n = n_B*pcbddc->local_primal_size; 1700 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1702 n += n_D*pcbddc->local_primal_size; 1703 } 1704 if (!pcbddc->symmetric_primal) { 1705 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1707 n = n_B*pcbddc->local_primal_size; 1708 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1709 } 1710 } else { 1711 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1712 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1713 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1714 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1715 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1716 } 1717 } 1718 } 1719 1720 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1721 p0_lidx_I = NULL; 1722 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1723 const PetscInt *idxs; 1724 1725 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1726 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1727 for (i=0;i<pcbddc->benign_n;i++) { 1728 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1729 } 1730 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1731 } 1732 1733 /* vertices */ 1734 if (n_vertices) { 1735 1736 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1737 1738 if (n_R) { 1739 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1740 PetscBLASInt B_N,B_one = 1; 1741 PetscScalar *x,*y; 1742 PetscBool isseqaij; 1743 1744 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1745 if (need_benign_correction) { 1746 ISLocalToGlobalMapping RtoN; 1747 IS is_p0; 1748 PetscInt *idxs_p0,n; 1749 1750 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1751 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1752 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1753 if (n != pcbddc->benign_n) { 1754 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1755 } 1756 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1757 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1758 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1759 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1760 } 1761 1762 if (lda_rhs == n_R) { 1763 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1764 } else { 1765 PetscScalar *av,*array; 1766 const PetscInt *xadj,*adjncy; 1767 PetscInt n; 1768 PetscBool flg_row; 1769 1770 array = work+lda_rhs*n_vertices; 1771 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1772 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1774 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1775 for (i=0;i<n;i++) { 1776 PetscInt j; 1777 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1778 } 1779 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1780 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1781 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1782 } 1783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1784 if (F) { 1785 if (need_benign_correction) { 1786 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1787 PetscScalar *marr; 1788 1789 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1790 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1791 1792 | 0 0 0 | (V) 1793 L = | 0 0 -1 | (P-p0) 1794 | 0 0 -1 | (p0) 1795 1796 */ 1797 for (i=0;i<reuse_solver->benign_n;i++) { 1798 const PetscScalar *vals; 1799 const PetscInt *idxs,*idxs_zero; 1800 PetscInt n,j,nz; 1801 1802 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1803 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1804 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1805 for (j=0;j<n;j++) { 1806 PetscScalar val = vals[j]; 1807 PetscInt k,col = idxs[j]; 1808 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1809 } 1810 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1811 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1812 } 1813 /* need to correct the rhs */ 1814 for (i=0;i<n_vertices;i++) { 1815 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1816 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1817 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1818 } 1819 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1820 } 1821 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1822 /* need to correct the solution */ 1823 if (need_benign_correction) { 1824 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1825 PetscScalar *marr; 1826 1827 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1828 for (i=0;i<n_vertices;i++) { 1829 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1830 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1831 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1832 } 1833 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1834 } 1835 } else { 1836 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1837 for (i=0;i<n_vertices;i++) { 1838 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1839 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1840 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1841 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1842 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1843 } 1844 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1845 } 1846 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1847 /* S_VV and S_CV */ 1848 if (n_constraints) { 1849 Mat B; 1850 1851 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1852 for (i=0;i<n_vertices;i++) { 1853 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1854 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1858 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1859 } 1860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1861 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1862 ierr = MatDestroy(&B);CHKERRQ(ierr); 1863 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1864 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1865 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1867 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1868 ierr = MatDestroy(&B);CHKERRQ(ierr); 1869 } 1870 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1871 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1872 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1873 } 1874 if (lda_rhs != n_R) { 1875 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1877 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1878 } 1879 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1880 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1881 if (need_benign_correction) { 1882 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1883 PetscScalar *marr,*sums; 1884 1885 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1886 ierr = MatDenseGetArray(S_VVt,&marr); 1887 for (i=0;i<reuse_solver->benign_n;i++) { 1888 const PetscScalar *vals; 1889 const PetscInt *idxs,*idxs_zero; 1890 PetscInt n,j,nz; 1891 1892 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1893 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1894 for (j=0;j<n_vertices;j++) { 1895 PetscInt k; 1896 sums[j] = 0.; 1897 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1898 } 1899 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1900 for (j=0;j<n;j++) { 1901 PetscScalar val = vals[j]; 1902 PetscInt k; 1903 for (k=0;k<n_vertices;k++) { 1904 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1905 } 1906 } 1907 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1908 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1909 } 1910 ierr = PetscFree(sums);CHKERRQ(ierr); 1911 ierr = MatDenseRestoreArray(S_VVt,&marr); 1912 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1913 } 1914 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1915 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1916 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1917 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1918 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1919 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1920 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1921 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1922 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1923 } else { 1924 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1925 } 1926 for (i=0;i<pcbddc->benign_n;i++) { 1927 const PetscScalar *vals; 1928 const PetscInt *idxs; 1929 PetscInt n,j,primal_idx; 1930 1931 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1932 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1933 for (j=0;j<n;j++) { 1934 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1935 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1936 } 1937 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1938 } 1939 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1940 1941 /* coarse basis functions */ 1942 for (i=0;i<n_vertices;i++) { 1943 PetscScalar *y; 1944 1945 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1946 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1947 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1948 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1950 y[n_B*i+idx_V_B[i]] = 1.0; 1951 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1952 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1953 1954 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1955 PetscInt j; 1956 1957 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1958 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1959 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1962 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1963 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1964 } 1965 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1966 } 1967 /* if n_R == 0 the object is not destroyed */ 1968 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1969 } 1970 1971 if (n_constraints) { 1972 Mat B; 1973 1974 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1975 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1976 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1977 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1978 if (n_vertices) { 1979 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1980 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1981 } else { 1982 Mat S_VCt; 1983 1984 if (lda_rhs != n_R) { 1985 ierr = MatDestroy(&B);CHKERRQ(ierr); 1986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1987 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1988 } 1989 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1990 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1991 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1992 } 1993 } 1994 ierr = MatDestroy(&B);CHKERRQ(ierr); 1995 /* coarse basis functions */ 1996 for (i=0;i<n_constraints;i++) { 1997 PetscScalar *y; 1998 1999 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2000 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2005 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2006 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2007 PetscInt j; 2008 2009 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2010 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2011 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2014 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2015 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2016 } 2017 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2018 } 2019 } 2020 if (n_constraints) { 2021 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2022 } 2023 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2024 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2025 2026 /* compute other basis functions for non-symmetric problems */ 2027 if (!pcbddc->symmetric_primal) { 2028 Mat B_V=NULL,B_C=NULL; 2029 PetscScalar *marray; 2030 2031 if (n_constraints) { 2032 Mat S_CCT,C_CRT; 2033 2034 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2035 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2036 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2037 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2038 if (n_vertices) { 2039 Mat S_VCT; 2040 2041 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2042 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2043 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2044 } 2045 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2046 } 2047 if (n_vertices && n_R) { 2048 PetscScalar *av,*marray; 2049 const PetscInt *xadj,*adjncy; 2050 PetscInt n; 2051 PetscBool flg_row; 2052 2053 /* B_V = B_V - A_VR^T */ 2054 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2055 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2056 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2057 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2058 for (i=0;i<n;i++) { 2059 PetscInt j; 2060 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2061 } 2062 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2063 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2064 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2065 } 2066 2067 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2068 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2069 for (i=0;i<n_vertices;i++) { 2070 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2071 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2072 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2073 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2074 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2075 } 2076 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2077 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2078 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2079 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2080 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2081 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2082 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2083 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2084 } 2085 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2086 /* coarse basis functions */ 2087 for (i=0;i<pcbddc->local_primal_size;i++) { 2088 PetscScalar *y; 2089 2090 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2091 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2092 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2093 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 if (i<n_vertices) { 2096 y[n_B*i+idx_V_B[i]] = 1.0; 2097 } 2098 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2099 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2100 2101 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2102 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2103 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2104 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2106 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2107 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2108 } 2109 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2110 } 2111 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2112 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2113 } 2114 /* free memory */ 2115 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2116 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2117 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2118 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2119 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2120 ierr = PetscFree(work);CHKERRQ(ierr); 2121 if (n_vertices) { 2122 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2123 } 2124 if (n_constraints) { 2125 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2126 } 2127 /* Checking coarse_sub_mat and coarse basis functios */ 2128 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2129 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2130 if (pcbddc->dbg_flag) { 2131 Mat coarse_sub_mat; 2132 Mat AUXMAT,TM1,TM2,TM3,TM4; 2133 Mat coarse_phi_D,coarse_phi_B; 2134 Mat coarse_psi_D,coarse_psi_B; 2135 Mat A_II,A_BB,A_IB,A_BI; 2136 Mat C_B,CPHI; 2137 IS is_dummy; 2138 Vec mones; 2139 MatType checkmattype=MATSEQAIJ; 2140 PetscReal real_value; 2141 2142 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2143 Mat A; 2144 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2145 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2146 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2147 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2148 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2149 ierr = MatDestroy(&A);CHKERRQ(ierr); 2150 } else { 2151 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2152 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2153 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2154 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2155 } 2156 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2157 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2158 if (!pcbddc->symmetric_primal) { 2159 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2160 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2161 } 2162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2163 2164 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2166 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2167 if (!pcbddc->symmetric_primal) { 2168 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2169 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2170 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2171 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2172 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2173 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2174 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2175 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2176 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2177 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2178 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2179 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2180 } else { 2181 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2182 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2183 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2184 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2185 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2186 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2187 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2188 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2189 } 2190 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2191 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2192 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2193 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2194 if (pcbddc->benign_n) { 2195 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2196 PetscScalar *data,*data2; 2197 PetscInt j; 2198 2199 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2200 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2201 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2202 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2203 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2204 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2205 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2206 for (j=0;j<pcbddc->benign_n;j++) { 2207 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2208 for (i=0;i<pcbddc->local_primal_size;i++) { 2209 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2210 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2211 } 2212 } 2213 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2214 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2215 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2216 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2217 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2218 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2219 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2220 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2221 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2222 } 2223 #if 0 2224 { 2225 PetscViewer viewer; 2226 char filename[256]; 2227 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2228 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2229 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2230 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2231 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2232 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2233 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2234 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2235 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2236 if (pcbddc->coarse_phi_D) { 2237 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2238 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2239 } 2240 if (pcbddc->coarse_psi_B) { 2241 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2242 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2243 } 2244 if (pcbddc->coarse_phi_B) { 2245 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2246 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2247 } 2248 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2249 } 2250 #endif 2251 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2252 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2254 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2255 2256 /* check constraints */ 2257 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2258 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2259 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2260 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2261 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2262 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2263 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2264 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2266 if (!pcbddc->symmetric_primal) { 2267 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2268 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2269 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2270 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2272 } 2273 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2274 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2275 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2276 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2277 } 2278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2279 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2280 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2281 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2282 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2283 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2284 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2285 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2286 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2287 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2288 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2289 if (!pcbddc->symmetric_primal) { 2290 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2291 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2292 } 2293 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2294 } 2295 /* get back data */ 2296 *coarse_submat_vals_n = coarse_submat_vals; 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2302 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2303 { 2304 Mat *work_mat; 2305 IS isrow_s,iscol_s; 2306 PetscBool rsorted,csorted; 2307 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2312 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2313 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2314 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2315 2316 if (!rsorted) { 2317 const PetscInt *idxs; 2318 PetscInt *idxs_sorted,i; 2319 2320 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2321 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2322 for (i=0;i<rsize;i++) { 2323 idxs_perm_r[i] = i; 2324 } 2325 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2326 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2327 for (i=0;i<rsize;i++) { 2328 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2329 } 2330 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2331 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2332 } else { 2333 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2334 isrow_s = isrow; 2335 } 2336 2337 if (!csorted) { 2338 if (isrow == iscol) { 2339 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2340 iscol_s = isrow_s; 2341 } else { 2342 const PetscInt *idxs; 2343 PetscInt *idxs_sorted,i; 2344 2345 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2346 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2347 for (i=0;i<csize;i++) { 2348 idxs_perm_c[i] = i; 2349 } 2350 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2351 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2352 for (i=0;i<csize;i++) { 2353 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2354 } 2355 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2356 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2357 } 2358 } else { 2359 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2360 iscol_s = iscol; 2361 } 2362 2363 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2364 2365 if (!rsorted || !csorted) { 2366 Mat new_mat; 2367 IS is_perm_r,is_perm_c; 2368 2369 if (!rsorted) { 2370 PetscInt *idxs_r,i; 2371 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2372 for (i=0;i<rsize;i++) { 2373 idxs_r[idxs_perm_r[i]] = i; 2374 } 2375 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2376 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2377 } else { 2378 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2379 } 2380 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2381 2382 if (!csorted) { 2383 if (isrow_s == iscol_s) { 2384 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2385 is_perm_c = is_perm_r; 2386 } else { 2387 PetscInt *idxs_c,i; 2388 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2389 for (i=0;i<csize;i++) { 2390 idxs_c[idxs_perm_c[i]] = i; 2391 } 2392 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2394 } 2395 } else { 2396 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2397 } 2398 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2399 2400 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2401 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2402 work_mat[0] = new_mat; 2403 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2404 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2405 } 2406 2407 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2408 *B = work_mat[0]; 2409 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2410 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2411 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2417 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2418 { 2419 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2420 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2421 Mat new_mat; 2422 IS is_local,is_global; 2423 PetscInt local_size; 2424 PetscBool isseqaij; 2425 PetscErrorCode ierr; 2426 2427 PetscFunctionBegin; 2428 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2429 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2430 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2431 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2432 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2433 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2434 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2435 2436 /* check */ 2437 if (pcbddc->dbg_flag) { 2438 Vec x,x_change; 2439 PetscReal error; 2440 2441 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2442 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2443 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2444 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2445 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2446 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2447 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2448 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2449 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2450 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2451 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2453 ierr = VecDestroy(&x);CHKERRQ(ierr); 2454 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2455 } 2456 2457 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2458 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2459 if (isseqaij) { 2460 Mat M; 2461 2462 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2463 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2464 ierr = MatDestroy(&M);CHKERRQ(ierr); 2465 } else { 2466 Mat work_mat,M; 2467 2468 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2469 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2470 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2471 ierr = MatDestroy(&M);CHKERRQ(ierr); 2472 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2473 } 2474 if (matis->A->symmetric_set) { 2475 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2476 #if !defined(PETSC_USE_COMPLEX) 2477 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2478 #endif 2479 } 2480 /* 2481 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2482 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2483 */ 2484 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2490 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2491 { 2492 PC_IS* pcis = (PC_IS*)(pc->data); 2493 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2494 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2495 PetscInt *idx_R_local=NULL; 2496 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2497 PetscInt vbs,bs; 2498 PetscBT bitmask=NULL; 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 /* 2503 No need to setup local scatters if 2504 - primal space is unchanged 2505 AND 2506 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2507 AND 2508 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2509 */ 2510 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2511 PetscFunctionReturn(0); 2512 } 2513 /* destroy old objects */ 2514 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2515 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2516 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2517 /* Set Non-overlapping dimensions */ 2518 n_B = pcis->n_B; 2519 n_D = pcis->n - n_B; 2520 n_vertices = pcbddc->n_vertices; 2521 2522 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2523 2524 /* create auxiliary bitmask and allocate workspace */ 2525 if (!sub_schurs->reuse_solver) { 2526 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2527 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2528 for (i=0;i<n_vertices;i++) { 2529 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2530 } 2531 2532 for (i=0, n_R=0; i<pcis->n; i++) { 2533 if (!PetscBTLookup(bitmask,i)) { 2534 idx_R_local[n_R++] = i; 2535 } 2536 } 2537 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2538 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2539 2540 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2541 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2542 } 2543 2544 /* Block code */ 2545 vbs = 1; 2546 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2547 if (bs>1 && !(n_vertices%bs)) { 2548 PetscBool is_blocked = PETSC_TRUE; 2549 PetscInt *vary; 2550 if (!sub_schurs->reuse_solver) { 2551 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2552 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2553 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2554 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2555 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2556 for (i=0; i<pcis->n/bs; i++) { 2557 if (vary[i]!=0 && vary[i]!=bs) { 2558 is_blocked = PETSC_FALSE; 2559 break; 2560 } 2561 } 2562 ierr = PetscFree(vary);CHKERRQ(ierr); 2563 } else { 2564 /* Verify directly the R set */ 2565 for (i=0; i<n_R/bs; i++) { 2566 PetscInt j,node=idx_R_local[bs*i]; 2567 for (j=1; j<bs; j++) { 2568 if (node != idx_R_local[bs*i+j]-j) { 2569 is_blocked = PETSC_FALSE; 2570 break; 2571 } 2572 } 2573 } 2574 } 2575 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2576 vbs = bs; 2577 for (i=0;i<n_R/vbs;i++) { 2578 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2579 } 2580 } 2581 } 2582 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2583 if (sub_schurs->reuse_solver) { 2584 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2585 2586 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2587 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2588 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2589 reuse_solver->is_R = pcbddc->is_R_local; 2590 } else { 2591 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2592 } 2593 2594 /* print some info if requested */ 2595 if (pcbddc->dbg_flag) { 2596 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2597 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2598 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2602 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2603 } 2604 2605 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2606 if (!sub_schurs->reuse_solver) { 2607 IS is_aux1,is_aux2; 2608 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2609 2610 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2611 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2612 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2613 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2614 for (i=0; i<n_D; i++) { 2615 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2616 } 2617 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2618 for (i=0, j=0; i<n_R; i++) { 2619 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2620 aux_array1[j++] = i; 2621 } 2622 } 2623 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2624 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2625 for (i=0, j=0; i<n_B; i++) { 2626 if (!PetscBTLookup(bitmask,is_indices[i])) { 2627 aux_array2[j++] = i; 2628 } 2629 } 2630 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2631 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2632 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2633 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2634 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2635 2636 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2637 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2638 for (i=0, j=0; i<n_R; i++) { 2639 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2640 aux_array1[j++] = i; 2641 } 2642 } 2643 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2644 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2645 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2646 } 2647 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2648 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2649 } else { 2650 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2651 IS tis; 2652 PetscInt schur_size; 2653 2654 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2655 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2656 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2657 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2658 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2659 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2660 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2661 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2662 } 2663 } 2664 PetscFunctionReturn(0); 2665 } 2666 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2670 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2671 { 2672 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2673 PC_IS *pcis = (PC_IS*)pc->data; 2674 PC pc_temp; 2675 Mat A_RR; 2676 MatReuse reuse; 2677 PetscScalar m_one = -1.0; 2678 PetscReal value; 2679 PetscInt n_D,n_R; 2680 PetscBool use_exact,use_exact_reduced,issbaij; 2681 PetscErrorCode ierr; 2682 /* prefixes stuff */ 2683 char dir_prefix[256],neu_prefix[256],str_level[16]; 2684 size_t len; 2685 2686 PetscFunctionBegin; 2687 2688 /* compute prefixes */ 2689 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2690 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2691 if (!pcbddc->current_level) { 2692 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2693 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2694 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2695 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2696 } else { 2697 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2698 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2699 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2700 len -= 15; /* remove "pc_bddc_coarse_" */ 2701 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2702 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2703 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2704 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2705 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2706 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2707 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2708 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2709 } 2710 2711 /* DIRICHLET PROBLEM */ 2712 if (dirichlet) { 2713 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2714 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2715 if (!sub_schurs->reuse_solver) { 2716 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2717 } 2718 if (pcbddc->dbg_flag) { 2719 Mat A_IIn; 2720 2721 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2722 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2723 pcis->A_II = A_IIn; 2724 } 2725 } 2726 if (pcbddc->local_mat->symmetric_set) { 2727 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2728 } 2729 /* Matrix for Dirichlet problem is pcis->A_II */ 2730 n_D = pcis->n - pcis->n_B; 2731 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2732 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2733 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2734 /* default */ 2735 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2736 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2737 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2738 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2739 if (issbaij) { 2740 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2741 } else { 2742 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2743 } 2744 /* Allow user's customization */ 2745 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2746 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2747 } 2748 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2749 if (sub_schurs->reuse_solver) { 2750 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2751 2752 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2753 } 2754 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2755 if (!n_D) { 2756 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2757 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2758 } 2759 /* Set Up KSP for Dirichlet problem of BDDC */ 2760 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2761 /* set ksp_D into pcis data */ 2762 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2763 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2764 pcis->ksp_D = pcbddc->ksp_D; 2765 } 2766 2767 /* NEUMANN PROBLEM */ 2768 A_RR = 0; 2769 if (neumann) { 2770 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2771 PetscInt ibs,mbs; 2772 PetscBool issbaij; 2773 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2774 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2775 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2776 if (pcbddc->ksp_R) { /* already created ksp */ 2777 PetscInt nn_R; 2778 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2779 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2780 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2781 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2782 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2783 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2784 reuse = MAT_INITIAL_MATRIX; 2785 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2786 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2787 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2788 reuse = MAT_INITIAL_MATRIX; 2789 } else { /* safe to reuse the matrix */ 2790 reuse = MAT_REUSE_MATRIX; 2791 } 2792 } 2793 /* last check */ 2794 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2795 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2796 reuse = MAT_INITIAL_MATRIX; 2797 } 2798 } else { /* first time, so we need to create the matrix */ 2799 reuse = MAT_INITIAL_MATRIX; 2800 } 2801 /* extract A_RR */ 2802 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2803 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2804 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2805 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2806 if (matis->A == pcbddc->local_mat) { 2807 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2808 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2809 } else { 2810 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2811 } 2812 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2813 if (matis->A == pcbddc->local_mat) { 2814 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2815 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2816 } else { 2817 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2818 } 2819 } 2820 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2821 if (pcbddc->local_mat->symmetric_set) { 2822 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2823 } 2824 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2825 Mat A_RRn; 2826 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2827 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2828 A_RR = A_RRn; 2829 } 2830 if (!pcbddc->ksp_R) { /* create object if not present */ 2831 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2832 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2833 /* default */ 2834 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2835 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2836 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2837 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2838 if (issbaij) { 2839 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2840 } else { 2841 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2842 } 2843 /* Allow user's customization */ 2844 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2845 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2846 } 2847 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2848 if (!n_R) { 2849 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2850 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2851 } 2852 /* Reuse solver if it is present */ 2853 if (sub_schurs->reuse_solver) { 2854 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2855 2856 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2857 } 2858 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2859 /* Set Up KSP for Neumann problem of BDDC */ 2860 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2861 } 2862 /* free Neumann problem's matrix */ 2863 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2864 2865 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2866 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2867 if (pcbddc->dbg_flag) { 2868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2869 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2870 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2871 } 2872 if (dirichlet) { /* Dirichlet */ 2873 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2874 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2875 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2876 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2877 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2878 /* need to be adapted? */ 2879 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2880 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2881 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2882 /* print info */ 2883 if (pcbddc->dbg_flag) { 2884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2885 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2886 } 2887 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2888 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2889 } 2890 } 2891 if (neumann) { /* Neumann */ 2892 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2893 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2894 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2895 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2896 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2897 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2898 /* need to be adapted? */ 2899 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2900 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2901 /* print info */ 2902 if (pcbddc->dbg_flag) { 2903 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2905 } 2906 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2907 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2908 } 2909 } 2910 } 2911 PetscFunctionReturn(0); 2912 } 2913 2914 #undef __FUNCT__ 2915 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2916 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2917 { 2918 PetscErrorCode ierr; 2919 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2920 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2921 2922 PetscFunctionBegin; 2923 if (!sub_schurs->reuse_solver) { 2924 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2925 } 2926 if (!pcbddc->switch_static) { 2927 if (applytranspose && pcbddc->local_auxmat1) { 2928 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2929 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2930 } 2931 if (!sub_schurs->reuse_solver) { 2932 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2933 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2934 } else { 2935 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2936 2937 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2938 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2939 } 2940 } else { 2941 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2942 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2943 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2944 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2945 if (applytranspose && pcbddc->local_auxmat1) { 2946 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2947 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2948 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2949 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2950 } 2951 } 2952 if (!sub_schurs->reuse_solver) { 2953 if (applytranspose) { 2954 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2955 } else { 2956 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2957 } 2958 } else { 2959 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2960 2961 if (applytranspose) { 2962 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2963 } else { 2964 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2965 } 2966 } 2967 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2968 if (!pcbddc->switch_static) { 2969 if (!sub_schurs->reuse_solver) { 2970 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2971 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2972 } else { 2973 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2974 2975 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2976 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2977 } 2978 if (!applytranspose && pcbddc->local_auxmat1) { 2979 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2980 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2981 } 2982 } else { 2983 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2984 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2985 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2986 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2987 if (!applytranspose && pcbddc->local_auxmat1) { 2988 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2989 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2990 } 2991 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2992 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2993 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2994 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2995 } 2996 PetscFunctionReturn(0); 2997 } 2998 2999 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3002 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3003 { 3004 PetscErrorCode ierr; 3005 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3006 PC_IS* pcis = (PC_IS*) (pc->data); 3007 const PetscScalar zero = 0.0; 3008 3009 PetscFunctionBegin; 3010 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3011 if (!pcbddc->benign_apply_coarse_only) { 3012 if (applytranspose) { 3013 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3014 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3015 } else { 3016 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3017 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3018 } 3019 } else { 3020 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3021 } 3022 3023 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3024 if (pcbddc->benign_n) { 3025 PetscScalar *array; 3026 PetscInt j; 3027 3028 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3029 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3030 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3031 } 3032 3033 /* start communications from local primal nodes to rhs of coarse solver */ 3034 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3035 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3036 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3037 3038 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3039 /* TODO remove null space when doing multilevel */ 3040 if (pcbddc->coarse_ksp) { 3041 Mat coarse_mat; 3042 Vec rhs,sol; 3043 MatNullSpace nullsp; 3044 3045 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3046 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3047 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3048 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3049 if (nullsp) { 3050 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3051 } 3052 if (applytranspose) { 3053 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3054 } else { 3055 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3056 } 3057 if (nullsp) { 3058 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3059 } 3060 } 3061 3062 /* Local solution on R nodes */ 3063 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3064 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3065 } 3066 /* communications from coarse sol to local primal nodes */ 3067 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3068 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3069 3070 /* Sum contributions from the two levels */ 3071 if (!pcbddc->benign_apply_coarse_only) { 3072 if (applytranspose) { 3073 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3074 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3075 } else { 3076 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3077 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3078 } 3079 /* store p0 */ 3080 if (pcbddc->benign_n) { 3081 PetscScalar *array; 3082 PetscInt j; 3083 3084 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3085 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3086 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3087 } 3088 } else { /* expand the coarse solution */ 3089 if (applytranspose) { 3090 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3091 } else { 3092 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3093 } 3094 } 3095 PetscFunctionReturn(0); 3096 } 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3100 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3101 { 3102 PetscErrorCode ierr; 3103 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3104 PetscScalar *array; 3105 Vec from,to; 3106 3107 PetscFunctionBegin; 3108 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3109 from = pcbddc->coarse_vec; 3110 to = pcbddc->vec1_P; 3111 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3112 Vec tvec; 3113 3114 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3115 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3116 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3117 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3118 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3119 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3120 } 3121 } else { /* from local to global -> put data in coarse right hand side */ 3122 from = pcbddc->vec1_P; 3123 to = pcbddc->coarse_vec; 3124 } 3125 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3131 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3132 { 3133 PetscErrorCode ierr; 3134 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3135 PetscScalar *array; 3136 Vec from,to; 3137 3138 PetscFunctionBegin; 3139 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3140 from = pcbddc->coarse_vec; 3141 to = pcbddc->vec1_P; 3142 } else { /* from local to global -> put data in coarse right hand side */ 3143 from = pcbddc->vec1_P; 3144 to = pcbddc->coarse_vec; 3145 } 3146 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3147 if (smode == SCATTER_FORWARD) { 3148 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3149 Vec tvec; 3150 3151 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3152 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3153 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3154 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3155 } 3156 } else { 3157 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3158 ierr = VecResetArray(from);CHKERRQ(ierr); 3159 } 3160 } 3161 PetscFunctionReturn(0); 3162 } 3163 3164 /* uncomment for testing purposes */ 3165 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3168 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3169 { 3170 PetscErrorCode ierr; 3171 PC_IS* pcis = (PC_IS*)(pc->data); 3172 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3173 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3174 /* one and zero */ 3175 PetscScalar one=1.0,zero=0.0; 3176 /* space to store constraints and their local indices */ 3177 PetscScalar *constraints_data; 3178 PetscInt *constraints_idxs,*constraints_idxs_B; 3179 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3180 PetscInt *constraints_n; 3181 /* iterators */ 3182 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3183 /* BLAS integers */ 3184 PetscBLASInt lwork,lierr; 3185 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3186 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3187 /* reuse */ 3188 PetscInt olocal_primal_size,olocal_primal_size_cc; 3189 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3190 /* change of basis */ 3191 PetscBool qr_needed; 3192 PetscBT change_basis,qr_needed_idx; 3193 /* auxiliary stuff */ 3194 PetscInt *nnz,*is_indices; 3195 PetscInt ncc; 3196 /* some quantities */ 3197 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3198 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3199 3200 PetscFunctionBegin; 3201 /* Destroy Mat objects computed previously */ 3202 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3203 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3204 /* save info on constraints from previous setup (if any) */ 3205 olocal_primal_size = pcbddc->local_primal_size; 3206 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3207 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3208 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3209 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3210 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3211 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3212 3213 /* print some info */ 3214 if (pcbddc->dbg_flag) { 3215 IS vertices; 3216 PetscInt nv,nedges,nfaces; 3217 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3218 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3219 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3220 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3223 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3225 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3226 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3227 } 3228 3229 if (!pcbddc->adaptive_selection) { 3230 IS ISForVertices,*ISForFaces,*ISForEdges; 3231 MatNullSpace nearnullsp; 3232 const Vec *nearnullvecs; 3233 Vec *localnearnullsp; 3234 PetscScalar *array; 3235 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3236 PetscBool nnsp_has_cnst; 3237 /* LAPACK working arrays for SVD or POD */ 3238 PetscBool skip_lapack,boolforchange; 3239 PetscScalar *work; 3240 PetscReal *singular_vals; 3241 #if defined(PETSC_USE_COMPLEX) 3242 PetscReal *rwork; 3243 #endif 3244 #if defined(PETSC_MISSING_LAPACK_GESVD) 3245 PetscScalar *temp_basis,*correlation_mat; 3246 #else 3247 PetscBLASInt dummy_int=1; 3248 PetscScalar dummy_scalar=1.; 3249 #endif 3250 3251 /* Get index sets for faces, edges and vertices from graph */ 3252 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3253 /* free unneeded index sets */ 3254 if (!pcbddc->use_vertices) { 3255 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3256 } 3257 if (!pcbddc->use_edges) { 3258 for (i=0;i<n_ISForEdges;i++) { 3259 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3260 } 3261 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3262 n_ISForEdges = 0; 3263 } 3264 if (!pcbddc->use_faces) { 3265 for (i=0;i<n_ISForFaces;i++) { 3266 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3267 } 3268 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3269 n_ISForFaces = 0; 3270 } 3271 3272 #if defined(PETSC_USE_DEBUG) 3273 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3274 Also use_change_of_basis should be consistent among processors */ 3275 if (pcbddc->NullSpace) { 3276 PetscBool tbool[2],gbool[2]; 3277 3278 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3279 pcbddc->use_change_of_basis = PETSC_TRUE; 3280 if (!ISForEdges) { 3281 pcbddc->use_change_on_faces = PETSC_TRUE; 3282 } 3283 } 3284 tbool[0] = pcbddc->use_change_of_basis; 3285 tbool[1] = pcbddc->use_change_on_faces; 3286 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3287 pcbddc->use_change_of_basis = gbool[0]; 3288 pcbddc->use_change_on_faces = gbool[1]; 3289 } 3290 #endif 3291 3292 /* check if near null space is attached to global mat */ 3293 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3294 if (nearnullsp) { 3295 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3296 /* remove any stored info */ 3297 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3298 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3299 /* store information for BDDC solver reuse */ 3300 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3301 pcbddc->onearnullspace = nearnullsp; 3302 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3303 for (i=0;i<nnsp_size;i++) { 3304 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3305 } 3306 } else { /* if near null space is not provided BDDC uses constants by default */ 3307 nnsp_size = 0; 3308 nnsp_has_cnst = PETSC_TRUE; 3309 } 3310 /* get max number of constraints on a single cc */ 3311 max_constraints = nnsp_size; 3312 if (nnsp_has_cnst) max_constraints++; 3313 3314 /* 3315 Evaluate maximum storage size needed by the procedure 3316 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3317 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3318 There can be multiple constraints per connected component 3319 */ 3320 n_vertices = 0; 3321 if (ISForVertices) { 3322 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3323 } 3324 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3325 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3326 3327 total_counts = n_ISForFaces+n_ISForEdges; 3328 total_counts *= max_constraints; 3329 total_counts += n_vertices; 3330 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3331 3332 total_counts = 0; 3333 max_size_of_constraint = 0; 3334 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3335 IS used_is; 3336 if (i<n_ISForEdges) { 3337 used_is = ISForEdges[i]; 3338 } else { 3339 used_is = ISForFaces[i-n_ISForEdges]; 3340 } 3341 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3342 total_counts += j; 3343 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3344 } 3345 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3346 3347 /* get local part of global near null space vectors */ 3348 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3349 for (k=0;k<nnsp_size;k++) { 3350 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3351 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3352 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3353 } 3354 3355 /* whether or not to skip lapack calls */ 3356 skip_lapack = PETSC_TRUE; 3357 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3358 3359 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3360 if (!skip_lapack) { 3361 PetscScalar temp_work; 3362 3363 #if defined(PETSC_MISSING_LAPACK_GESVD) 3364 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3365 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3366 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3367 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3368 #if defined(PETSC_USE_COMPLEX) 3369 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3370 #endif 3371 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3372 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3373 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3374 lwork = -1; 3375 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3376 #if !defined(PETSC_USE_COMPLEX) 3377 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3378 #else 3379 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3380 #endif 3381 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3382 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3383 #else /* on missing GESVD */ 3384 /* SVD */ 3385 PetscInt max_n,min_n; 3386 max_n = max_size_of_constraint; 3387 min_n = max_constraints; 3388 if (max_size_of_constraint < max_constraints) { 3389 min_n = max_size_of_constraint; 3390 max_n = max_constraints; 3391 } 3392 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3393 #if defined(PETSC_USE_COMPLEX) 3394 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3395 #endif 3396 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3397 lwork = -1; 3398 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3399 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3400 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3401 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3402 #if !defined(PETSC_USE_COMPLEX) 3403 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3404 #else 3405 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3406 #endif 3407 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3408 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3409 #endif /* on missing GESVD */ 3410 /* Allocate optimal workspace */ 3411 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3412 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3413 } 3414 /* Now we can loop on constraining sets */ 3415 total_counts = 0; 3416 constraints_idxs_ptr[0] = 0; 3417 constraints_data_ptr[0] = 0; 3418 /* vertices */ 3419 if (n_vertices) { 3420 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3421 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3422 for (i=0;i<n_vertices;i++) { 3423 constraints_n[total_counts] = 1; 3424 constraints_data[total_counts] = 1.0; 3425 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3426 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3427 total_counts++; 3428 } 3429 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3430 n_vertices = total_counts; 3431 } 3432 3433 /* edges and faces */ 3434 total_counts_cc = total_counts; 3435 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3436 IS used_is; 3437 PetscBool idxs_copied = PETSC_FALSE; 3438 3439 if (ncc<n_ISForEdges) { 3440 used_is = ISForEdges[ncc]; 3441 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3442 } else { 3443 used_is = ISForFaces[ncc-n_ISForEdges]; 3444 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3445 } 3446 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3447 3448 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3449 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3450 /* change of basis should not be performed on local periodic nodes */ 3451 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3452 if (nnsp_has_cnst) { 3453 PetscScalar quad_value; 3454 3455 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3456 idxs_copied = PETSC_TRUE; 3457 3458 if (!pcbddc->use_nnsp_true) { 3459 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3460 } else { 3461 quad_value = 1.0; 3462 } 3463 for (j=0;j<size_of_constraint;j++) { 3464 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3465 } 3466 temp_constraints++; 3467 total_counts++; 3468 } 3469 for (k=0;k<nnsp_size;k++) { 3470 PetscReal real_value; 3471 PetscScalar *ptr_to_data; 3472 3473 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3474 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3475 for (j=0;j<size_of_constraint;j++) { 3476 ptr_to_data[j] = array[is_indices[j]]; 3477 } 3478 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3479 /* check if array is null on the connected component */ 3480 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3481 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3482 if (real_value > 0.0) { /* keep indices and values */ 3483 temp_constraints++; 3484 total_counts++; 3485 if (!idxs_copied) { 3486 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3487 idxs_copied = PETSC_TRUE; 3488 } 3489 } 3490 } 3491 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3492 valid_constraints = temp_constraints; 3493 if (!pcbddc->use_nnsp_true && temp_constraints) { 3494 if (temp_constraints == 1) { /* just normalize the constraint */ 3495 PetscScalar norm,*ptr_to_data; 3496 3497 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3499 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3500 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3501 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3502 } else { /* perform SVD */ 3503 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3504 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3505 3506 #if defined(PETSC_MISSING_LAPACK_GESVD) 3507 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3508 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3509 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3510 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3511 from that computed using LAPACKgesvd 3512 -> This is due to a different computation of eigenvectors in LAPACKheev 3513 -> The quality of the POD-computed basis will be the same */ 3514 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3515 /* Store upper triangular part of correlation matrix */ 3516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3517 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3518 for (j=0;j<temp_constraints;j++) { 3519 for (k=0;k<j+1;k++) { 3520 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3521 } 3522 } 3523 /* compute eigenvalues and eigenvectors of correlation matrix */ 3524 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3525 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3526 #if !defined(PETSC_USE_COMPLEX) 3527 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3528 #else 3529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3530 #endif 3531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3532 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3533 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3534 j = 0; 3535 while (j < temp_constraints && singular_vals[j] < tol) j++; 3536 total_counts = total_counts-j; 3537 valid_constraints = temp_constraints-j; 3538 /* scale and copy POD basis into used quadrature memory */ 3539 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3540 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3541 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3543 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3545 if (j<temp_constraints) { 3546 PetscInt ii; 3547 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3548 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3549 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3550 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3551 for (k=0;k<temp_constraints-j;k++) { 3552 for (ii=0;ii<size_of_constraint;ii++) { 3553 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3554 } 3555 } 3556 } 3557 #else /* on missing GESVD */ 3558 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3559 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3560 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3561 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3562 #if !defined(PETSC_USE_COMPLEX) 3563 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3564 #else 3565 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3566 #endif 3567 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3568 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3569 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3570 k = temp_constraints; 3571 if (k > size_of_constraint) k = size_of_constraint; 3572 j = 0; 3573 while (j < k && singular_vals[k-j-1] < tol) j++; 3574 valid_constraints = k-j; 3575 total_counts = total_counts-temp_constraints+valid_constraints; 3576 #endif /* on missing GESVD */ 3577 } 3578 } 3579 /* update pointers information */ 3580 if (valid_constraints) { 3581 constraints_n[total_counts_cc] = valid_constraints; 3582 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3583 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3584 /* set change_of_basis flag */ 3585 if (boolforchange) { 3586 PetscBTSet(change_basis,total_counts_cc); 3587 } 3588 total_counts_cc++; 3589 } 3590 } 3591 /* free workspace */ 3592 if (!skip_lapack) { 3593 ierr = PetscFree(work);CHKERRQ(ierr); 3594 #if defined(PETSC_USE_COMPLEX) 3595 ierr = PetscFree(rwork);CHKERRQ(ierr); 3596 #endif 3597 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3598 #if defined(PETSC_MISSING_LAPACK_GESVD) 3599 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3600 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3601 #endif 3602 } 3603 for (k=0;k<nnsp_size;k++) { 3604 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3605 } 3606 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3607 /* free index sets of faces, edges and vertices */ 3608 for (i=0;i<n_ISForFaces;i++) { 3609 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3610 } 3611 if (n_ISForFaces) { 3612 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3613 } 3614 for (i=0;i<n_ISForEdges;i++) { 3615 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3616 } 3617 if (n_ISForEdges) { 3618 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3619 } 3620 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3621 } else { 3622 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3623 3624 total_counts = 0; 3625 n_vertices = 0; 3626 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3627 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3628 } 3629 max_constraints = 0; 3630 total_counts_cc = 0; 3631 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3632 total_counts += pcbddc->adaptive_constraints_n[i]; 3633 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3634 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3635 } 3636 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3637 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3638 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3639 constraints_data = pcbddc->adaptive_constraints_data; 3640 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3641 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3642 total_counts_cc = 0; 3643 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3644 if (pcbddc->adaptive_constraints_n[i]) { 3645 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3646 } 3647 } 3648 #if 0 3649 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3650 for (i=0;i<total_counts_cc;i++) { 3651 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3652 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3653 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3654 printf(" %d",constraints_idxs[j]); 3655 } 3656 printf("\n"); 3657 printf("number of cc: %d\n",constraints_n[i]); 3658 } 3659 for (i=0;i<n_vertices;i++) { 3660 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3661 } 3662 for (i=0;i<sub_schurs->n_subs;i++) { 3663 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3664 } 3665 #endif 3666 3667 max_size_of_constraint = 0; 3668 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3669 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3670 /* Change of basis */ 3671 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3672 if (pcbddc->use_change_of_basis) { 3673 for (i=0;i<sub_schurs->n_subs;i++) { 3674 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3675 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3676 } 3677 } 3678 } 3679 } 3680 pcbddc->local_primal_size = total_counts; 3681 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3682 3683 /* map constraints_idxs in boundary numbering */ 3684 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3685 if (i != constraints_idxs_ptr[total_counts_cc]) { 3686 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3687 } 3688 3689 /* Create constraint matrix */ 3690 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3691 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3692 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3693 3694 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3695 /* determine if a QR strategy is needed for change of basis */ 3696 qr_needed = PETSC_FALSE; 3697 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3698 total_primal_vertices=0; 3699 pcbddc->local_primal_size_cc = 0; 3700 for (i=0;i<total_counts_cc;i++) { 3701 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3702 if (size_of_constraint == 1) { 3703 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3704 pcbddc->local_primal_size_cc += 1; 3705 } else if (PetscBTLookup(change_basis,i)) { 3706 for (k=0;k<constraints_n[i];k++) { 3707 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3708 } 3709 pcbddc->local_primal_size_cc += constraints_n[i]; 3710 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3711 PetscBTSet(qr_needed_idx,i); 3712 qr_needed = PETSC_TRUE; 3713 } 3714 } else { 3715 pcbddc->local_primal_size_cc += 1; 3716 } 3717 } 3718 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3719 pcbddc->n_vertices = total_primal_vertices; 3720 /* permute indices in order to have a sorted set of vertices */ 3721 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3722 3723 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3724 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3725 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3726 3727 /* nonzero structure of constraint matrix */ 3728 /* and get reference dof for local constraints */ 3729 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3730 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3731 3732 j = total_primal_vertices; 3733 total_counts = total_primal_vertices; 3734 cum = total_primal_vertices; 3735 for (i=n_vertices;i<total_counts_cc;i++) { 3736 if (!PetscBTLookup(change_basis,i)) { 3737 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3738 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3739 cum++; 3740 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3741 for (k=0;k<constraints_n[i];k++) { 3742 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3743 nnz[j+k] = size_of_constraint; 3744 } 3745 j += constraints_n[i]; 3746 } 3747 } 3748 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3749 ierr = PetscFree(nnz);CHKERRQ(ierr); 3750 3751 /* set values in constraint matrix */ 3752 for (i=0;i<total_primal_vertices;i++) { 3753 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3754 } 3755 total_counts = total_primal_vertices; 3756 for (i=n_vertices;i<total_counts_cc;i++) { 3757 if (!PetscBTLookup(change_basis,i)) { 3758 PetscInt *cols; 3759 3760 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3761 cols = constraints_idxs+constraints_idxs_ptr[i]; 3762 for (k=0;k<constraints_n[i];k++) { 3763 PetscInt row = total_counts+k; 3764 PetscScalar *vals; 3765 3766 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3767 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3768 } 3769 total_counts += constraints_n[i]; 3770 } 3771 } 3772 /* assembling */ 3773 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3774 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3775 3776 /* 3777 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3778 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3779 */ 3780 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3781 if (pcbddc->use_change_of_basis) { 3782 /* dual and primal dofs on a single cc */ 3783 PetscInt dual_dofs,primal_dofs; 3784 /* working stuff for GEQRF */ 3785 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3786 PetscBLASInt lqr_work; 3787 /* working stuff for UNGQR */ 3788 PetscScalar *gqr_work,lgqr_work_t; 3789 PetscBLASInt lgqr_work; 3790 /* working stuff for TRTRS */ 3791 PetscScalar *trs_rhs; 3792 PetscBLASInt Blas_NRHS; 3793 /* pointers for values insertion into change of basis matrix */ 3794 PetscInt *start_rows,*start_cols; 3795 PetscScalar *start_vals; 3796 /* working stuff for values insertion */ 3797 PetscBT is_primal; 3798 PetscInt *aux_primal_numbering_B; 3799 /* matrix sizes */ 3800 PetscInt global_size,local_size; 3801 /* temporary change of basis */ 3802 Mat localChangeOfBasisMatrix; 3803 /* extra space for debugging */ 3804 PetscScalar *dbg_work; 3805 3806 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3807 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3808 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3809 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3810 /* nonzeros for local mat */ 3811 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3812 for (i=0;i<pcis->n;i++) nnz[i]=1; 3813 for (i=n_vertices;i<total_counts_cc;i++) { 3814 if (PetscBTLookup(change_basis,i)) { 3815 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3816 if (PetscBTLookup(qr_needed_idx,i)) { 3817 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3818 } else { 3819 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3820 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3821 } 3822 } 3823 } 3824 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3825 ierr = PetscFree(nnz);CHKERRQ(ierr); 3826 /* Set initial identity in the matrix */ 3827 for (i=0;i<pcis->n;i++) { 3828 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3829 } 3830 3831 if (pcbddc->dbg_flag) { 3832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3833 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3834 } 3835 3836 3837 /* Now we loop on the constraints which need a change of basis */ 3838 /* 3839 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3840 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3841 3842 Basic blocks of change of basis matrix T computed by 3843 3844 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3845 3846 | 1 0 ... 0 s_1/S | 3847 | 0 1 ... 0 s_2/S | 3848 | ... | 3849 | 0 ... 1 s_{n-1}/S | 3850 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3851 3852 with S = \sum_{i=1}^n s_i^2 3853 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3854 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3855 3856 - QR decomposition of constraints otherwise 3857 */ 3858 if (qr_needed) { 3859 /* space to store Q */ 3860 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3861 /* first we issue queries for optimal work */ 3862 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3863 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3864 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3865 lqr_work = -1; 3866 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3867 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3868 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3869 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3870 lgqr_work = -1; 3871 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3872 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3873 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3874 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3875 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3876 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3877 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3878 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3879 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3880 /* array to store scaling factors for reflectors */ 3881 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3882 /* array to store rhs and solution of triangular solver */ 3883 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3884 /* allocating workspace for check */ 3885 if (pcbddc->dbg_flag) { 3886 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3887 } 3888 } 3889 /* array to store whether a node is primal or not */ 3890 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3891 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3892 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3893 if (i != total_primal_vertices) { 3894 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3895 } 3896 for (i=0;i<total_primal_vertices;i++) { 3897 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3898 } 3899 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3900 3901 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3902 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3903 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3904 if (PetscBTLookup(change_basis,total_counts)) { 3905 /* get constraint info */ 3906 primal_dofs = constraints_n[total_counts]; 3907 dual_dofs = size_of_constraint-primal_dofs; 3908 3909 if (pcbddc->dbg_flag) { 3910 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3911 } 3912 3913 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3914 3915 /* copy quadrature constraints for change of basis check */ 3916 if (pcbddc->dbg_flag) { 3917 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3918 } 3919 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3920 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3921 3922 /* compute QR decomposition of constraints */ 3923 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3924 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3926 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3927 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3928 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3930 3931 /* explictly compute R^-T */ 3932 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3933 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3934 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3935 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3937 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3938 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3939 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3940 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3941 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3942 3943 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3946 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3948 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3949 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3950 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3951 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3952 3953 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3954 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3955 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3957 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3958 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3959 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3960 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3961 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3962 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3963 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3964 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3965 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3966 3967 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3968 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3969 /* insert cols for primal dofs */ 3970 for (j=0;j<primal_dofs;j++) { 3971 start_vals = &qr_basis[j*size_of_constraint]; 3972 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3973 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3974 } 3975 /* insert cols for dual dofs */ 3976 for (j=0,k=0;j<dual_dofs;k++) { 3977 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3978 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3979 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3980 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3981 j++; 3982 } 3983 } 3984 3985 /* check change of basis */ 3986 if (pcbddc->dbg_flag) { 3987 PetscInt ii,jj; 3988 PetscBool valid_qr=PETSC_TRUE; 3989 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3993 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3994 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3995 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3996 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3997 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3998 for (jj=0;jj<size_of_constraint;jj++) { 3999 for (ii=0;ii<primal_dofs;ii++) { 4000 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4001 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4002 } 4003 } 4004 if (!valid_qr) { 4005 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4006 for (jj=0;jj<size_of_constraint;jj++) { 4007 for (ii=0;ii<primal_dofs;ii++) { 4008 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4009 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4010 } 4011 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4012 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4013 } 4014 } 4015 } 4016 } else { 4017 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4018 } 4019 } 4020 } else { /* simple transformation block */ 4021 PetscInt row,col; 4022 PetscScalar val,norm; 4023 4024 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4025 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4026 for (j=0;j<size_of_constraint;j++) { 4027 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4028 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4029 if (!PetscBTLookup(is_primal,row_B)) { 4030 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4031 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4032 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4033 } else { 4034 for (k=0;k<size_of_constraint;k++) { 4035 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4036 if (row != col) { 4037 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4038 } else { 4039 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4040 } 4041 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4042 } 4043 } 4044 } 4045 if (pcbddc->dbg_flag) { 4046 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4047 } 4048 } 4049 } else { 4050 if (pcbddc->dbg_flag) { 4051 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4052 } 4053 } 4054 } 4055 4056 /* free workspace */ 4057 if (qr_needed) { 4058 if (pcbddc->dbg_flag) { 4059 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4060 } 4061 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4062 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4063 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4064 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4065 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4066 } 4067 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4068 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4069 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4070 4071 /* assembling of global change of variable */ 4072 { 4073 Mat tmat; 4074 PetscInt bs; 4075 4076 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4077 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4078 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4079 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4080 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4081 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4082 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4083 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4084 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4085 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4086 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4087 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4088 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4089 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4090 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4091 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4092 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4093 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4094 } 4095 /* check */ 4096 if (pcbddc->dbg_flag) { 4097 PetscReal error; 4098 Vec x,x_change; 4099 4100 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4101 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4102 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4103 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4104 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4105 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4106 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4107 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4108 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4109 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4110 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4111 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4114 ierr = VecDestroy(&x);CHKERRQ(ierr); 4115 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4116 } 4117 4118 /* adapt sub_schurs computed (if any) */ 4119 if (pcbddc->use_deluxe_scaling) { 4120 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4121 if (sub_schurs->S_Ej_all) { 4122 Mat S_new,tmat; 4123 ISLocalToGlobalMapping NtoSall; 4124 IS is_all_N,is_V,is_V_Sall; 4125 const PetscScalar *array; 4126 const PetscInt *idxs_V,*idxs_all; 4127 PetscInt i,n_V; 4128 4129 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4130 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4131 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4132 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4133 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4134 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4135 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4136 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4137 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4138 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4139 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4140 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4141 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4142 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4143 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4144 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4145 for (i=0;i<n_V;i++) { 4146 PetscScalar val; 4147 PetscInt idx; 4148 4149 idx = idxs_V[i]; 4150 val = array[idxs_all[idxs_V[i]]]; 4151 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4152 } 4153 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4154 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4155 sub_schurs->S_Ej_all = S_new; 4156 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4157 if (sub_schurs->sum_S_Ej_all) { 4158 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4159 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4160 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4161 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4162 sub_schurs->sum_S_Ej_all = S_new; 4163 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4164 } 4165 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4166 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4167 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4168 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4169 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4170 } 4171 } 4172 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4173 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4174 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4175 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4176 } 4177 4178 /* set up change of basis context */ 4179 if (pcbddc->ChangeOfBasisMatrix) { 4180 PCBDDCChange_ctx change_ctx; 4181 4182 if (!pcbddc->new_global_mat) { 4183 PetscInt global_size,local_size; 4184 4185 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4186 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4187 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4188 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4189 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4190 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4191 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4192 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4193 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4194 } else { 4195 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4196 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4197 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4198 } 4199 if (!pcbddc->user_ChangeOfBasisMatrix) { 4200 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4201 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4202 } else { 4203 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4204 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4205 } 4206 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4207 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4208 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4209 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4210 } else { 4211 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4212 } 4213 4214 /* add pressure dofs to set of primal nodes for numbering purposes */ 4215 for (i=0;i<pcbddc->benign_n;i++) { 4216 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4217 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4218 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4219 pcbddc->local_primal_size_cc++; 4220 pcbddc->local_primal_size++; 4221 } 4222 4223 /* check if a new primal space has been introduced (also take into account benign trick) */ 4224 pcbddc->new_primal_space_local = PETSC_TRUE; 4225 if (olocal_primal_size == pcbddc->local_primal_size) { 4226 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4227 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4228 if (!pcbddc->new_primal_space_local) { 4229 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4230 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4231 } 4232 } 4233 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4234 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4235 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4236 4237 /* flush dbg viewer */ 4238 if (pcbddc->dbg_flag) { 4239 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4240 } 4241 4242 /* free workspace */ 4243 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4244 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4245 if (!pcbddc->adaptive_selection) { 4246 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4247 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4248 } else { 4249 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4250 pcbddc->adaptive_constraints_idxs_ptr, 4251 pcbddc->adaptive_constraints_data_ptr, 4252 pcbddc->adaptive_constraints_idxs, 4253 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4254 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4255 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4256 } 4257 PetscFunctionReturn(0); 4258 } 4259 4260 #undef __FUNCT__ 4261 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4262 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4263 { 4264 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4265 PC_IS *pcis = (PC_IS*)pc->data; 4266 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4267 PetscInt ierr,i,vertex_size,N; 4268 PetscViewer viewer=pcbddc->dbg_viewer; 4269 4270 PetscFunctionBegin; 4271 /* Reset previously computed graph */ 4272 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4273 /* Init local Graph struct */ 4274 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4275 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4276 4277 /* Check validity of the csr graph passed in by the user */ 4278 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4279 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4280 } 4281 4282 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4283 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4284 PetscInt *xadj,*adjncy; 4285 PetscInt nvtxs; 4286 PetscBool flg_row=PETSC_FALSE; 4287 4288 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4289 if (flg_row) { 4290 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4291 pcbddc->computed_rowadj = PETSC_TRUE; 4292 } 4293 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4294 } 4295 if (pcbddc->dbg_flag) { 4296 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4297 } 4298 4299 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4300 vertex_size = 1; 4301 if (pcbddc->user_provided_isfordofs) { 4302 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4303 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4304 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4305 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4306 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4307 } 4308 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4309 pcbddc->n_ISForDofs = 0; 4310 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4311 } 4312 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4313 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4314 } else { 4315 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4316 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4317 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4318 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4319 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4320 } 4321 } 4322 } 4323 4324 /* Setup of Graph */ 4325 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4326 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4327 } 4328 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4329 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4330 } 4331 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4332 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4333 } 4334 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4335 4336 /* attach info on disconnected subdomains if present */ 4337 if (pcbddc->n_local_subs) { 4338 PetscInt *local_subs; 4339 4340 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4341 for (i=0;i<pcbddc->n_local_subs;i++) { 4342 const PetscInt *idxs; 4343 PetscInt nl,j; 4344 4345 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4346 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4347 for (j=0;j<nl;j++) { 4348 local_subs[idxs[j]] = i; 4349 } 4350 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4351 } 4352 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4353 pcbddc->mat_graph->local_subs = local_subs; 4354 } 4355 4356 /* Graph's connected components analysis */ 4357 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4358 4359 /* print some info to stdout */ 4360 if (pcbddc->dbg_flag) { 4361 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4362 } 4363 4364 /* mark topography has done */ 4365 pcbddc->recompute_topography = PETSC_FALSE; 4366 PetscFunctionReturn(0); 4367 } 4368 4369 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4370 #undef __FUNCT__ 4371 #define __FUNCT__ "PCBDDCSubsetNumbering" 4372 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4373 { 4374 PetscSF sf; 4375 PetscLayout map; 4376 const PetscInt *idxs; 4377 PetscInt *leaf_data,*root_data,*gidxs; 4378 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4379 PetscInt n_n,nlocals,start,first_index; 4380 PetscMPIInt commsize; 4381 PetscBool first_found; 4382 PetscErrorCode ierr; 4383 4384 PetscFunctionBegin; 4385 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4386 if (subset_mult) { 4387 PetscCheckSameComm(subset,1,subset_mult,2); 4388 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4389 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4390 } 4391 /* create workspace layout for computing global indices of subset */ 4392 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4393 lbounds[0] = lbounds[1] = 0; 4394 for (i=0;i<n;i++) { 4395 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4396 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4397 } 4398 lbounds[0] = -lbounds[0]; 4399 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4400 gbounds[0] = -gbounds[0]; 4401 N = gbounds[1] - gbounds[0] + 1; 4402 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4403 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4404 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4405 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4406 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4407 4408 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4409 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4410 if (subset_mult) { 4411 const PetscInt* idxs_mult; 4412 4413 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4414 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4415 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4416 } else { 4417 for (i=0;i<n;i++) leaf_data[i] = 1; 4418 } 4419 /* local size of new subset */ 4420 n_n = 0; 4421 for (i=0;i<n;i++) n_n += leaf_data[i]; 4422 4423 /* global indexes in layout */ 4424 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4425 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4426 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4427 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4428 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4429 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4430 4431 /* reduce from leaves to roots */ 4432 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4433 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4434 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4435 4436 /* count indexes in local part of layout */ 4437 nlocals = 0; 4438 first_index = -1; 4439 first_found = PETSC_FALSE; 4440 for (i=0;i<Nl;i++) { 4441 if (!first_found && root_data[i]) { 4442 first_found = PETSC_TRUE; 4443 first_index = i; 4444 } 4445 nlocals += root_data[i]; 4446 } 4447 4448 /* cumulative of number of indexes and size of subset without holes */ 4449 #if defined(PETSC_HAVE_MPI_EXSCAN) 4450 start = 0; 4451 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4452 #else 4453 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4454 start = start-nlocals; 4455 #endif 4456 4457 if (N_n) { /* compute total size of new subset if requested */ 4458 *N_n = start + nlocals; 4459 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4460 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4461 } 4462 4463 /* adapt root data with cumulative */ 4464 if (first_found) { 4465 PetscInt old_index; 4466 4467 root_data[first_index] += start; 4468 old_index = first_index; 4469 for (i=first_index+1;i<Nl;i++) { 4470 if (root_data[i]) { 4471 root_data[i] += root_data[old_index]; 4472 old_index = i; 4473 } 4474 } 4475 } 4476 4477 /* from roots to leaves */ 4478 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4479 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4480 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4481 4482 /* create new IS with global indexes without holes */ 4483 if (subset_mult) { 4484 const PetscInt* idxs_mult; 4485 PetscInt cum; 4486 4487 cum = 0; 4488 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4489 for (i=0;i<n;i++) { 4490 PetscInt j; 4491 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4492 } 4493 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4494 } else { 4495 for (i=0;i<n;i++) { 4496 gidxs[i] = leaf_data[i]-1; 4497 } 4498 } 4499 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4500 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4501 PetscFunctionReturn(0); 4502 } 4503 4504 #undef __FUNCT__ 4505 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4506 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4507 { 4508 PetscInt i,j; 4509 PetscScalar *alphas; 4510 PetscErrorCode ierr; 4511 4512 PetscFunctionBegin; 4513 /* this implements stabilized Gram-Schmidt */ 4514 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4515 for (i=0;i<n;i++) { 4516 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4517 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4518 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4519 } 4520 ierr = PetscFree(alphas);CHKERRQ(ierr); 4521 PetscFunctionReturn(0); 4522 } 4523 4524 #undef __FUNCT__ 4525 #define __FUNCT__ "MatISGetSubassemblingPattern" 4526 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4527 { 4528 IS ranks_send_to; 4529 PetscInt n_neighs,*neighs,*n_shared,**shared; 4530 PetscMPIInt size,rank,color; 4531 PetscInt *xadj,*adjncy; 4532 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4533 PetscInt i,local_size,threshold=0; 4534 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4535 PetscSubcomm subcomm; 4536 PetscErrorCode ierr; 4537 4538 PetscFunctionBegin; 4539 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4540 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4541 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4542 4543 /* Get info on mapping */ 4544 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4545 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4546 4547 /* build local CSR graph of subdomains' connectivity */ 4548 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4549 xadj[0] = 0; 4550 xadj[1] = PetscMax(n_neighs-1,0); 4551 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4552 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4553 4554 if (threshold) { 4555 PetscInt xadj_count = 0; 4556 for (i=1;i<n_neighs;i++) { 4557 if (n_shared[i] > threshold) { 4558 adjncy[xadj_count] = neighs[i]; 4559 adjncy_wgt[xadj_count] = n_shared[i]; 4560 xadj_count++; 4561 } 4562 } 4563 xadj[1] = xadj_count; 4564 } else { 4565 if (xadj[1]) { 4566 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4567 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4568 } 4569 } 4570 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4571 if (use_square) { 4572 for (i=0;i<xadj[1];i++) { 4573 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4574 } 4575 } 4576 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4577 4578 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4579 4580 /* 4581 Restrict work on active processes only. 4582 */ 4583 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4584 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4585 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4586 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4587 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4588 if (color) { 4589 ierr = PetscFree(xadj);CHKERRQ(ierr); 4590 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4591 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4592 } else { 4593 Mat subdomain_adj; 4594 IS new_ranks,new_ranks_contig; 4595 MatPartitioning partitioner; 4596 PetscInt prank,rstart=0,rend=0; 4597 PetscInt *is_indices,*oldranks; 4598 PetscBool aggregate; 4599 4600 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4601 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4602 prank = rank; 4603 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4604 /* 4605 for (i=0;i<size;i++) { 4606 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4607 } 4608 */ 4609 for (i=0;i<xadj[1];i++) { 4610 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4611 } 4612 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4613 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4614 if (aggregate) { 4615 PetscInt lrows,row,ncols,*cols; 4616 PetscMPIInt nrank; 4617 PetscScalar *vals; 4618 4619 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4620 lrows = 0; 4621 if (nrank<redprocs) { 4622 lrows = size/redprocs; 4623 if (nrank<size%redprocs) lrows++; 4624 } 4625 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4626 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4627 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4628 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4629 row = nrank; 4630 ncols = xadj[1]-xadj[0]; 4631 cols = adjncy; 4632 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4633 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4634 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4635 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4636 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4637 ierr = PetscFree(xadj);CHKERRQ(ierr); 4638 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4639 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4640 ierr = PetscFree(vals);CHKERRQ(ierr); 4641 } else { 4642 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4643 } 4644 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4645 4646 /* Partition */ 4647 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4648 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4649 if (use_vwgt) { 4650 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4651 v_wgt[0] = local_size; 4652 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4653 } 4654 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4655 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4656 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4657 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4658 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4659 4660 /* renumber new_ranks to avoid "holes" in new set of processors */ 4661 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4662 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4663 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4664 if (!redprocs) { 4665 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4666 } else { 4667 PetscInt idxs[1]; 4668 PetscMPIInt tag; 4669 MPI_Request *reqs; 4670 4671 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4672 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4673 for (i=rstart;i<rend;i++) { 4674 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4675 } 4676 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4677 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4678 ierr = PetscFree(reqs);CHKERRQ(ierr); 4679 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4680 } 4681 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4682 /* clean up */ 4683 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4684 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4685 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4686 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4687 } 4688 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4689 4690 /* assemble parallel IS for sends */ 4691 i = 1; 4692 if (color) i=0; 4693 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4694 /* get back IS */ 4695 *is_sends = ranks_send_to; 4696 PetscFunctionReturn(0); 4697 } 4698 4699 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4700 4701 #undef __FUNCT__ 4702 #define __FUNCT__ "MatISSubassemble" 4703 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4704 { 4705 Mat local_mat; 4706 IS is_sends_internal; 4707 PetscInt rows,cols,new_local_rows; 4708 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4709 PetscBool ismatis,isdense,newisdense,destroy_mat; 4710 ISLocalToGlobalMapping l2gmap; 4711 PetscInt* l2gmap_indices; 4712 const PetscInt* is_indices; 4713 MatType new_local_type; 4714 /* buffers */ 4715 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4716 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4717 PetscInt *recv_buffer_idxs_local; 4718 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4719 /* MPI */ 4720 MPI_Comm comm,comm_n; 4721 PetscSubcomm subcomm; 4722 PetscMPIInt n_sends,n_recvs,commsize; 4723 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4724 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4725 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4726 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4727 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4728 PetscErrorCode ierr; 4729 4730 PetscFunctionBegin; 4731 /* TODO: add missing checks */ 4732 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4733 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4734 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4735 PetscValidLogicalCollectiveInt(mat,nis,7); 4736 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4737 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4738 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4739 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4740 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4741 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4742 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4743 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4744 PetscInt mrows,mcols,mnrows,mncols; 4745 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4746 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4747 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4748 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4749 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4750 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4751 } 4752 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4753 PetscValidLogicalCollectiveInt(mat,bs,0); 4754 /* prepare IS for sending if not provided */ 4755 if (!is_sends) { 4756 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4757 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4758 } else { 4759 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4760 is_sends_internal = is_sends; 4761 } 4762 4763 /* get comm */ 4764 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4765 4766 /* compute number of sends */ 4767 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4768 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4769 4770 /* compute number of receives */ 4771 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4772 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4773 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4774 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4775 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4776 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4777 ierr = PetscFree(iflags);CHKERRQ(ierr); 4778 4779 /* restrict comm if requested */ 4780 subcomm = 0; 4781 destroy_mat = PETSC_FALSE; 4782 if (restrict_comm) { 4783 PetscMPIInt color,subcommsize; 4784 4785 color = 0; 4786 if (restrict_full) { 4787 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4788 } else { 4789 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4790 } 4791 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4792 subcommsize = commsize - subcommsize; 4793 /* check if reuse has been requested */ 4794 if (reuse == MAT_REUSE_MATRIX) { 4795 if (*mat_n) { 4796 PetscMPIInt subcommsize2; 4797 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4798 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4799 comm_n = PetscObjectComm((PetscObject)*mat_n); 4800 } else { 4801 comm_n = PETSC_COMM_SELF; 4802 } 4803 } else { /* MAT_INITIAL_MATRIX */ 4804 PetscMPIInt rank; 4805 4806 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4807 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4808 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4809 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4810 comm_n = PetscSubcommChild(subcomm); 4811 } 4812 /* flag to destroy *mat_n if not significative */ 4813 if (color) destroy_mat = PETSC_TRUE; 4814 } else { 4815 comm_n = comm; 4816 } 4817 4818 /* prepare send/receive buffers */ 4819 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4820 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4821 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4822 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4823 if (nis) { 4824 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4825 } 4826 4827 /* Get data from local matrices */ 4828 if (!isdense) { 4829 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4830 /* TODO: See below some guidelines on how to prepare the local buffers */ 4831 /* 4832 send_buffer_vals should contain the raw values of the local matrix 4833 send_buffer_idxs should contain: 4834 - MatType_PRIVATE type 4835 - PetscInt size_of_l2gmap 4836 - PetscInt global_row_indices[size_of_l2gmap] 4837 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4838 */ 4839 } else { 4840 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4841 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4842 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4843 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4844 send_buffer_idxs[1] = i; 4845 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4846 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4847 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4848 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4849 for (i=0;i<n_sends;i++) { 4850 ilengths_vals[is_indices[i]] = len*len; 4851 ilengths_idxs[is_indices[i]] = len+2; 4852 } 4853 } 4854 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4855 /* additional is (if any) */ 4856 if (nis) { 4857 PetscMPIInt psum; 4858 PetscInt j; 4859 for (j=0,psum=0;j<nis;j++) { 4860 PetscInt plen; 4861 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4862 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4863 psum += len+1; /* indices + lenght */ 4864 } 4865 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4866 for (j=0,psum=0;j<nis;j++) { 4867 PetscInt plen; 4868 const PetscInt *is_array_idxs; 4869 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4870 send_buffer_idxs_is[psum] = plen; 4871 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4872 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4873 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4874 psum += plen+1; /* indices + lenght */ 4875 } 4876 for (i=0;i<n_sends;i++) { 4877 ilengths_idxs_is[is_indices[i]] = psum; 4878 } 4879 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4880 } 4881 4882 buf_size_idxs = 0; 4883 buf_size_vals = 0; 4884 buf_size_idxs_is = 0; 4885 for (i=0;i<n_recvs;i++) { 4886 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4887 buf_size_vals += (PetscInt)olengths_vals[i]; 4888 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4889 } 4890 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4891 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4892 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4893 4894 /* get new tags for clean communications */ 4895 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4896 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4897 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4898 4899 /* allocate for requests */ 4900 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4901 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4902 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4903 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4904 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4905 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4906 4907 /* communications */ 4908 ptr_idxs = recv_buffer_idxs; 4909 ptr_vals = recv_buffer_vals; 4910 ptr_idxs_is = recv_buffer_idxs_is; 4911 for (i=0;i<n_recvs;i++) { 4912 source_dest = onodes[i]; 4913 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4914 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4915 ptr_idxs += olengths_idxs[i]; 4916 ptr_vals += olengths_vals[i]; 4917 if (nis) { 4918 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4919 ptr_idxs_is += olengths_idxs_is[i]; 4920 } 4921 } 4922 for (i=0;i<n_sends;i++) { 4923 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4924 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4925 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4926 if (nis) { 4927 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4928 } 4929 } 4930 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4931 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4932 4933 /* assemble new l2g map */ 4934 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4935 ptr_idxs = recv_buffer_idxs; 4936 new_local_rows = 0; 4937 for (i=0;i<n_recvs;i++) { 4938 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4939 ptr_idxs += olengths_idxs[i]; 4940 } 4941 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4942 ptr_idxs = recv_buffer_idxs; 4943 new_local_rows = 0; 4944 for (i=0;i<n_recvs;i++) { 4945 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4946 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4947 ptr_idxs += olengths_idxs[i]; 4948 } 4949 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4950 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4951 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4952 4953 /* infer new local matrix type from received local matrices type */ 4954 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4955 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4956 if (n_recvs) { 4957 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4958 ptr_idxs = recv_buffer_idxs; 4959 for (i=0;i<n_recvs;i++) { 4960 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4961 new_local_type_private = MATAIJ_PRIVATE; 4962 break; 4963 } 4964 ptr_idxs += olengths_idxs[i]; 4965 } 4966 switch (new_local_type_private) { 4967 case MATDENSE_PRIVATE: 4968 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4969 new_local_type = MATSEQAIJ; 4970 bs = 1; 4971 } else { /* if I receive only 1 dense matrix */ 4972 new_local_type = MATSEQDENSE; 4973 bs = 1; 4974 } 4975 break; 4976 case MATAIJ_PRIVATE: 4977 new_local_type = MATSEQAIJ; 4978 bs = 1; 4979 break; 4980 case MATBAIJ_PRIVATE: 4981 new_local_type = MATSEQBAIJ; 4982 break; 4983 case MATSBAIJ_PRIVATE: 4984 new_local_type = MATSEQSBAIJ; 4985 break; 4986 default: 4987 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4988 break; 4989 } 4990 } else { /* by default, new_local_type is seqdense */ 4991 new_local_type = MATSEQDENSE; 4992 bs = 1; 4993 } 4994 4995 /* create MATIS object if needed */ 4996 if (reuse == MAT_INITIAL_MATRIX) { 4997 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4998 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4999 } else { 5000 /* it also destroys the local matrices */ 5001 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5002 } 5003 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5004 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5005 5006 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5007 5008 /* Global to local map of received indices */ 5009 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5010 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5011 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5012 5013 /* restore attributes -> type of incoming data and its size */ 5014 buf_size_idxs = 0; 5015 for (i=0;i<n_recvs;i++) { 5016 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5017 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5018 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5019 } 5020 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5021 5022 /* set preallocation */ 5023 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5024 if (!newisdense) { 5025 PetscInt *new_local_nnz=0; 5026 5027 ptr_vals = recv_buffer_vals; 5028 ptr_idxs = recv_buffer_idxs_local; 5029 if (n_recvs) { 5030 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5031 } 5032 for (i=0;i<n_recvs;i++) { 5033 PetscInt j; 5034 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5035 for (j=0;j<*(ptr_idxs+1);j++) { 5036 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5037 } 5038 } else { 5039 /* TODO */ 5040 } 5041 ptr_idxs += olengths_idxs[i]; 5042 } 5043 if (new_local_nnz) { 5044 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5045 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5046 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5047 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5048 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5049 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5050 } else { 5051 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5052 } 5053 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5054 } else { 5055 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5056 } 5057 5058 /* set values */ 5059 ptr_vals = recv_buffer_vals; 5060 ptr_idxs = recv_buffer_idxs_local; 5061 for (i=0;i<n_recvs;i++) { 5062 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5063 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5064 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5065 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5066 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5067 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5068 } else { 5069 /* TODO */ 5070 } 5071 ptr_idxs += olengths_idxs[i]; 5072 ptr_vals += olengths_vals[i]; 5073 } 5074 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5075 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5076 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5077 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5078 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5079 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5080 5081 #if 0 5082 if (!restrict_comm) { /* check */ 5083 Vec lvec,rvec; 5084 PetscReal infty_error; 5085 5086 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5087 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5088 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5089 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5090 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5091 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5092 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5093 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5094 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5095 } 5096 #endif 5097 5098 /* assemble new additional is (if any) */ 5099 if (nis) { 5100 PetscInt **temp_idxs,*count_is,j,psum; 5101 5102 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5103 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5104 ptr_idxs = recv_buffer_idxs_is; 5105 psum = 0; 5106 for (i=0;i<n_recvs;i++) { 5107 for (j=0;j<nis;j++) { 5108 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5109 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5110 psum += plen; 5111 ptr_idxs += plen+1; /* shift pointer to received data */ 5112 } 5113 } 5114 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5115 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5116 for (i=1;i<nis;i++) { 5117 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5118 } 5119 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5120 ptr_idxs = recv_buffer_idxs_is; 5121 for (i=0;i<n_recvs;i++) { 5122 for (j=0;j<nis;j++) { 5123 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5124 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5125 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5126 ptr_idxs += plen+1; /* shift pointer to received data */ 5127 } 5128 } 5129 for (i=0;i<nis;i++) { 5130 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5131 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5132 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5133 } 5134 ierr = PetscFree(count_is);CHKERRQ(ierr); 5135 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5136 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5137 } 5138 /* free workspace */ 5139 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5140 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5141 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5142 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5143 if (isdense) { 5144 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5145 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5146 } else { 5147 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5148 } 5149 if (nis) { 5150 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5151 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5152 } 5153 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5154 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5155 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5156 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5157 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5158 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5159 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5160 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5161 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5162 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5163 ierr = PetscFree(onodes);CHKERRQ(ierr); 5164 if (nis) { 5165 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5166 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5167 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5168 } 5169 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5170 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5171 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5172 for (i=0;i<nis;i++) { 5173 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5174 } 5175 *mat_n = NULL; 5176 } 5177 PetscFunctionReturn(0); 5178 } 5179 5180 /* temporary hack into ksp private data structure */ 5181 #include <petsc/private/kspimpl.h> 5182 5183 #undef __FUNCT__ 5184 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5185 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5186 { 5187 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5188 PC_IS *pcis = (PC_IS*)pc->data; 5189 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5190 MatNullSpace CoarseNullSpace=NULL; 5191 ISLocalToGlobalMapping coarse_islg; 5192 IS coarse_is,*isarray; 5193 PetscInt i,im_active=-1,active_procs=-1; 5194 PetscInt nis,nisdofs,nisneu,nisvert; 5195 PC pc_temp; 5196 PCType coarse_pc_type; 5197 KSPType coarse_ksp_type; 5198 PetscBool multilevel_requested,multilevel_allowed; 5199 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5200 Mat t_coarse_mat_is; 5201 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5202 PetscMPIInt all_procs; 5203 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5204 PetscBool compute_vecs = PETSC_FALSE; 5205 PetscScalar *array; 5206 PetscErrorCode ierr; 5207 5208 PetscFunctionBegin; 5209 /* Assign global numbering to coarse dofs */ 5210 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5211 PetscInt ocoarse_size; 5212 compute_vecs = PETSC_TRUE; 5213 ocoarse_size = pcbddc->coarse_size; 5214 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5215 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5216 /* see if we can avoid some work */ 5217 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5218 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5219 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5220 PC pc; 5221 PetscBool isbddc; 5222 5223 /* temporary workaround since PCBDDC does not have a reset method so far */ 5224 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5225 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5226 if (isbddc) { 5227 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5228 } else { 5229 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5230 } 5231 coarse_reuse = PETSC_FALSE; 5232 } else { /* we can safely reuse already computed coarse matrix */ 5233 coarse_reuse = PETSC_TRUE; 5234 } 5235 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5236 coarse_reuse = PETSC_FALSE; 5237 } 5238 /* reset any subassembling information */ 5239 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5240 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5241 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5242 coarse_reuse = PETSC_TRUE; 5243 } 5244 5245 /* count "active" (i.e. with positive local size) and "void" processes */ 5246 im_active = !!(pcis->n); 5247 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5248 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5249 void_procs = all_procs-active_procs; 5250 csin_type_simple = PETSC_TRUE; 5251 redist = PETSC_FALSE; 5252 if (pcbddc->current_level && void_procs) { 5253 csin_ml = PETSC_TRUE; 5254 ncoarse_ml = void_procs; 5255 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5256 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5257 csin_ds = PETSC_TRUE; 5258 ncoarse_ds = pcbddc->redistribute_coarse; 5259 redist = PETSC_TRUE; 5260 } else { 5261 csin_ds = PETSC_TRUE; 5262 ncoarse_ds = active_procs; 5263 redist = PETSC_TRUE; 5264 } 5265 } else { 5266 csin_ml = PETSC_FALSE; 5267 ncoarse_ml = all_procs; 5268 if (void_procs) { 5269 csin_ds = PETSC_TRUE; 5270 ncoarse_ds = void_procs; 5271 csin_type_simple = PETSC_FALSE; 5272 } else { 5273 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5274 csin_ds = PETSC_TRUE; 5275 ncoarse_ds = pcbddc->redistribute_coarse; 5276 redist = PETSC_TRUE; 5277 } else { 5278 csin_ds = PETSC_FALSE; 5279 ncoarse_ds = all_procs; 5280 } 5281 } 5282 } 5283 5284 /* 5285 test if we can go multilevel: three conditions must be satisfied: 5286 - we have not exceeded the number of levels requested 5287 - we can actually subassemble the active processes 5288 - we can find a suitable number of MPI processes where we can place the subassembled problem 5289 */ 5290 multilevel_allowed = PETSC_FALSE; 5291 multilevel_requested = PETSC_FALSE; 5292 if (pcbddc->current_level < pcbddc->max_levels) { 5293 multilevel_requested = PETSC_TRUE; 5294 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5295 multilevel_allowed = PETSC_FALSE; 5296 } else { 5297 multilevel_allowed = PETSC_TRUE; 5298 } 5299 } 5300 /* determine number of process partecipating to coarse solver */ 5301 if (multilevel_allowed) { 5302 ncoarse = ncoarse_ml; 5303 csin = csin_ml; 5304 redist = PETSC_FALSE; 5305 } else { 5306 ncoarse = ncoarse_ds; 5307 csin = csin_ds; 5308 } 5309 5310 /* creates temporary l2gmap and IS for coarse indexes */ 5311 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5312 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5313 5314 /* creates temporary MATIS object for coarse matrix */ 5315 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5316 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5317 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5318 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5319 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5320 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5321 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5322 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5323 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5324 5325 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5326 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5327 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5328 const PetscInt *idxs; 5329 ISLocalToGlobalMapping tmap; 5330 5331 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5332 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5333 /* allocate space for temporary storage */ 5334 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5335 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5336 /* allocate for IS array */ 5337 nisdofs = pcbddc->n_ISForDofsLocal; 5338 nisneu = !!pcbddc->NeumannBoundariesLocal; 5339 nisvert = 0; 5340 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5341 nisvert = 1; 5342 } 5343 nis = nisdofs + nisneu + nisvert; 5344 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5345 /* dofs splitting */ 5346 for (i=0;i<nisdofs;i++) { 5347 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5348 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5349 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5350 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5351 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5352 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5353 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5354 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5355 } 5356 /* neumann boundaries */ 5357 if (pcbddc->NeumannBoundariesLocal) { 5358 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5359 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5360 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5361 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5362 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5363 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5364 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5365 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5366 } 5367 /* primal vertices (benign) */ 5368 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5369 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5370 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5371 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5372 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5373 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5374 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5375 } 5376 /* free memory */ 5377 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5378 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5379 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5380 } else { 5381 nis = 0; 5382 nisdofs = 0; 5383 nisneu = 0; 5384 nisvert = 0; 5385 isarray = NULL; 5386 } 5387 /* destroy no longer needed map */ 5388 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5389 5390 /* restrict on coarse candidates (if needed) */ 5391 coarse_mat_is = NULL; 5392 if (csin) { 5393 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5394 if (redist) { 5395 PetscMPIInt rank; 5396 PetscInt spc,n_spc_p1,dest[1],destsize; 5397 5398 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5399 spc = active_procs/ncoarse; 5400 n_spc_p1 = active_procs%ncoarse; 5401 if (im_active) { 5402 destsize = 1; 5403 if (rank > n_spc_p1*(spc+1)-1) { 5404 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5405 } else { 5406 dest[0] = rank/(spc+1); 5407 } 5408 } else { 5409 destsize = 0; 5410 } 5411 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5412 } else if (csin_type_simple) { 5413 PetscMPIInt rank; 5414 PetscInt issize,isidx; 5415 5416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5417 if (im_active) { 5418 issize = 1; 5419 isidx = (PetscInt)rank; 5420 } else { 5421 issize = 0; 5422 isidx = -1; 5423 } 5424 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5425 } else { /* get a suitable subassembling pattern from MATIS code */ 5426 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5427 } 5428 5429 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5430 if (!redist || ncoarse <= void_procs) { 5431 PetscInt ncoarse_cand,tissize,*nisindices; 5432 PetscInt *coarse_candidates; 5433 const PetscInt* tisindices; 5434 5435 /* get coarse candidates' ranks in pc communicator */ 5436 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5437 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5438 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5439 if (!coarse_candidates[i]) { 5440 coarse_candidates[ncoarse_cand++]=i; 5441 } 5442 } 5443 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5444 5445 5446 if (pcbddc->dbg_flag) { 5447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5449 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5450 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5451 for (i=0;i<ncoarse_cand;i++) { 5452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5453 } 5454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5455 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5456 } 5457 /* shift the pattern on coarse candidates */ 5458 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5459 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5460 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5461 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5462 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5463 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5464 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5465 } 5466 if (pcbddc->dbg_flag) { 5467 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5468 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5469 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5470 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5471 } 5472 } 5473 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5474 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5475 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5476 } else { /* this is the last level, so use just receiving processes in subcomm */ 5477 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5478 } 5479 } else { 5480 if (pcbddc->dbg_flag) { 5481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5482 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5483 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5484 } 5485 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5486 coarse_mat_is = t_coarse_mat_is; 5487 } 5488 5489 /* create local to global scatters for coarse problem */ 5490 if (compute_vecs) { 5491 PetscInt lrows; 5492 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5493 if (coarse_mat_is) { 5494 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5495 } else { 5496 lrows = 0; 5497 } 5498 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5499 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5500 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5501 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5502 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5503 } 5504 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5505 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5506 5507 /* set defaults for coarse KSP and PC */ 5508 if (multilevel_allowed) { 5509 coarse_ksp_type = KSPRICHARDSON; 5510 coarse_pc_type = PCBDDC; 5511 } else { 5512 coarse_ksp_type = KSPPREONLY; 5513 coarse_pc_type = PCREDUNDANT; 5514 } 5515 5516 /* print some info if requested */ 5517 if (pcbddc->dbg_flag) { 5518 if (!multilevel_allowed) { 5519 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5520 if (multilevel_requested) { 5521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5522 } else if (pcbddc->max_levels) { 5523 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5524 } 5525 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5526 } 5527 } 5528 5529 /* create the coarse KSP object only once with defaults */ 5530 if (coarse_mat_is) { 5531 MatReuse coarse_mat_reuse; 5532 PetscViewer dbg_viewer = NULL; 5533 if (pcbddc->dbg_flag) { 5534 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5535 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5536 } 5537 if (!pcbddc->coarse_ksp) { 5538 char prefix[256],str_level[16]; 5539 size_t len; 5540 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5541 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5542 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5543 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5544 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5545 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5546 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5547 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5548 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5549 /* prefix */ 5550 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5551 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5552 if (!pcbddc->current_level) { 5553 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5554 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5555 } else { 5556 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5557 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5558 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5559 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5560 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5561 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5562 } 5563 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5564 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5565 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5566 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5567 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5568 /* allow user customization */ 5569 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5570 } 5571 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5572 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5573 if (nisdofs) { 5574 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5575 for (i=0;i<nisdofs;i++) { 5576 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5577 } 5578 } 5579 if (nisneu) { 5580 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5581 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5582 } 5583 if (nisvert) { 5584 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5585 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5586 } 5587 5588 /* get some info after set from options */ 5589 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5590 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5591 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5592 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5593 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5594 isbddc = PETSC_FALSE; 5595 } 5596 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5597 if (isredundant) { 5598 KSP inner_ksp; 5599 PC inner_pc; 5600 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5601 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5602 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5603 } 5604 5605 /* assemble coarse matrix */ 5606 if (coarse_reuse) { 5607 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5608 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5609 coarse_mat_reuse = MAT_REUSE_MATRIX; 5610 } else { 5611 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5612 } 5613 if (isbddc || isnn) { 5614 if (isbddc) { /* currently there's no API for this */ 5615 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5616 pcbddc->detect_disconnected = PETSC_TRUE; 5617 } 5618 if (pcbddc->coarsening_ratio > 1) { 5619 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5620 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5621 if (pcbddc->dbg_flag) { 5622 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5623 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5624 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5625 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5626 } 5627 } 5628 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5629 } else { 5630 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5631 coarse_mat = coarse_mat_is; 5632 } 5633 } else { 5634 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5635 } 5636 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5637 5638 /* propagate symmetry info of coarse matrix */ 5639 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5640 if (pc->pmat->symmetric_set) { 5641 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5642 } 5643 if (pc->pmat->hermitian_set) { 5644 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5645 } 5646 if (pc->pmat->spd_set) { 5647 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5648 } 5649 /* set operators */ 5650 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5651 if (pcbddc->dbg_flag) { 5652 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5653 } 5654 } else { /* processes non partecipating to coarse solver (if any) */ 5655 coarse_mat = 0; 5656 } 5657 ierr = PetscFree(isarray);CHKERRQ(ierr); 5658 #if 0 5659 { 5660 PetscViewer viewer; 5661 char filename[256]; 5662 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5663 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5664 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5665 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5666 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5667 } 5668 #endif 5669 5670 /* Compute coarse null space (special handling by BDDC only) */ 5671 #if 0 5672 if (pcbddc->NullSpace) { 5673 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5674 } 5675 #endif 5676 /* hack */ 5677 if (pcbddc->coarse_ksp) { 5678 Vec crhs,csol; 5679 5680 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5681 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5682 if (!csol) { 5683 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5684 } 5685 if (!crhs) { 5686 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5687 } 5688 } 5689 5690 /* compute null space for coarse solver if the benign trick has been requested */ 5691 if (pcbddc->benign_null) { 5692 5693 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5694 for (i=0;i<pcbddc->benign_n;i++) { 5695 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5696 } 5697 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5698 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5699 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5700 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5701 if (coarse_mat) { 5702 Vec nullv; 5703 PetscScalar *array,*array2; 5704 PetscInt nl; 5705 5706 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5707 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5708 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5709 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5710 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5711 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5712 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5713 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5714 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5715 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5716 } 5717 } 5718 5719 if (pcbddc->coarse_ksp) { 5720 PetscBool ispreonly; 5721 5722 if (CoarseNullSpace) { 5723 PetscBool isnull; 5724 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5725 if (0) { 5726 if (isbddc && !pcbddc->benign_saddle_point) { 5727 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5728 } else { 5729 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5730 } 5731 } else { 5732 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5733 } 5734 } 5735 /* setup coarse ksp */ 5736 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5737 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5738 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5739 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5740 KSP check_ksp; 5741 KSPType check_ksp_type; 5742 PC check_pc; 5743 Vec check_vec,coarse_vec; 5744 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5745 PetscInt its; 5746 PetscBool compute_eigs; 5747 PetscReal *eigs_r,*eigs_c; 5748 PetscInt neigs; 5749 const char *prefix; 5750 5751 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5752 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5753 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5754 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5755 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5756 if (ispreonly) { 5757 check_ksp_type = KSPPREONLY; 5758 compute_eigs = PETSC_FALSE; 5759 } else { 5760 check_ksp_type = KSPGMRES; 5761 compute_eigs = PETSC_TRUE; 5762 } 5763 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5764 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5765 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5766 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5767 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5768 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5769 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5770 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5771 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5772 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5773 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5774 /* create random vec */ 5775 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5776 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5777 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5778 if (CoarseNullSpace) { 5779 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5780 } 5781 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5782 /* solve coarse problem */ 5783 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5784 if (CoarseNullSpace) { 5785 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5786 } 5787 /* set eigenvalue estimation if preonly has not been requested */ 5788 if (compute_eigs) { 5789 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5790 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5791 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5792 lambda_max = eigs_r[neigs-1]; 5793 lambda_min = eigs_r[0]; 5794 if (pcbddc->use_coarse_estimates) { 5795 if (lambda_max>lambda_min) { 5796 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5797 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5798 } 5799 } 5800 } 5801 5802 /* check coarse problem residual error */ 5803 if (pcbddc->dbg_flag) { 5804 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5805 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5806 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5807 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5808 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5809 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5810 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5811 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5812 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5813 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5814 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5815 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5816 if (CoarseNullSpace) { 5817 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5818 } 5819 if (compute_eigs) { 5820 PetscReal lambda_max_s,lambda_min_s; 5821 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5822 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5823 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5824 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5825 for (i=0;i<neigs;i++) { 5826 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5827 } 5828 } 5829 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5830 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5831 } 5832 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5833 if (compute_eigs) { 5834 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5835 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5836 } 5837 } 5838 } 5839 /* print additional info */ 5840 if (pcbddc->dbg_flag) { 5841 /* waits until all processes reaches this point */ 5842 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5843 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5844 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5845 } 5846 5847 /* free memory */ 5848 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5849 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5850 PetscFunctionReturn(0); 5851 } 5852 5853 #undef __FUNCT__ 5854 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5855 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5856 { 5857 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5858 PC_IS* pcis = (PC_IS*)pc->data; 5859 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5860 IS subset,subset_mult,subset_n; 5861 PetscInt local_size,coarse_size=0; 5862 PetscInt *local_primal_indices=NULL; 5863 const PetscInt *t_local_primal_indices; 5864 PetscErrorCode ierr; 5865 5866 PetscFunctionBegin; 5867 /* Compute global number of coarse dofs */ 5868 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5869 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5870 } 5871 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5872 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5873 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5874 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5875 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5876 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5877 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5878 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5879 if (local_size != pcbddc->local_primal_size) { 5880 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5881 } 5882 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5883 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5884 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5885 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5886 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5887 5888 /* check numbering */ 5889 if (pcbddc->dbg_flag) { 5890 PetscScalar coarsesum,*array,*array2; 5891 PetscInt i; 5892 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5893 5894 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5896 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5897 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5898 /* counter */ 5899 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5900 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5901 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5902 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5903 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5904 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5905 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5906 for (i=0;i<pcbddc->local_primal_size;i++) { 5907 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5908 } 5909 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5910 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5911 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5912 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5913 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5914 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5915 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5916 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5917 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5918 for (i=0;i<pcis->n;i++) { 5919 if (array[i] != 0.0 && array[i] != array2[i]) { 5920 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5921 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5922 set_error = PETSC_TRUE; 5923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5924 } 5925 } 5926 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5927 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5928 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5929 for (i=0;i<pcis->n;i++) { 5930 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5931 } 5932 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5933 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5934 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5935 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5936 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5937 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5938 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5939 PetscInt *gidxs; 5940 5941 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5942 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5943 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5946 for (i=0;i<pcbddc->local_primal_size;i++) { 5947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5948 } 5949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5950 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5951 } 5952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5953 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5954 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5955 } 5956 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5957 /* get back data */ 5958 *coarse_size_n = coarse_size; 5959 *local_primal_indices_n = local_primal_indices; 5960 PetscFunctionReturn(0); 5961 } 5962 5963 #undef __FUNCT__ 5964 #define __FUNCT__ "PCBDDCGlobalToLocal" 5965 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5966 { 5967 IS localis_t; 5968 PetscInt i,lsize,*idxs,n; 5969 PetscScalar *vals; 5970 PetscErrorCode ierr; 5971 5972 PetscFunctionBegin; 5973 /* get indices in local ordering exploiting local to global map */ 5974 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5975 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5976 for (i=0;i<lsize;i++) vals[i] = 1.0; 5977 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5978 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5979 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5980 if (idxs) { /* multilevel guard */ 5981 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5982 } 5983 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5984 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5985 ierr = PetscFree(vals);CHKERRQ(ierr); 5986 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5987 /* now compute set in local ordering */ 5988 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5989 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5990 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5991 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5992 for (i=0,lsize=0;i<n;i++) { 5993 if (PetscRealPart(vals[i]) > 0.5) { 5994 lsize++; 5995 } 5996 } 5997 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5998 for (i=0,lsize=0;i<n;i++) { 5999 if (PetscRealPart(vals[i]) > 0.5) { 6000 idxs[lsize++] = i; 6001 } 6002 } 6003 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6004 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6005 *localis = localis_t; 6006 PetscFunctionReturn(0); 6007 } 6008 6009 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6010 #undef __FUNCT__ 6011 #define __FUNCT__ "PCBDDCMatMult_Private" 6012 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6013 { 6014 PCBDDCChange_ctx change_ctx; 6015 PetscErrorCode ierr; 6016 6017 PetscFunctionBegin; 6018 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6019 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6020 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6021 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6022 PetscFunctionReturn(0); 6023 } 6024 6025 #undef __FUNCT__ 6026 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6027 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6028 { 6029 PCBDDCChange_ctx change_ctx; 6030 PetscErrorCode ierr; 6031 6032 PetscFunctionBegin; 6033 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6034 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6035 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6036 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6037 PetscFunctionReturn(0); 6038 } 6039 6040 #undef __FUNCT__ 6041 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6042 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6043 { 6044 PC_IS *pcis=(PC_IS*)pc->data; 6045 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6046 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6047 Mat S_j; 6048 PetscInt *used_xadj,*used_adjncy; 6049 PetscBool free_used_adj; 6050 PetscErrorCode ierr; 6051 6052 PetscFunctionBegin; 6053 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6054 free_used_adj = PETSC_FALSE; 6055 if (pcbddc->sub_schurs_layers == -1) { 6056 used_xadj = NULL; 6057 used_adjncy = NULL; 6058 } else { 6059 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6060 used_xadj = pcbddc->mat_graph->xadj; 6061 used_adjncy = pcbddc->mat_graph->adjncy; 6062 } else if (pcbddc->computed_rowadj) { 6063 used_xadj = pcbddc->mat_graph->xadj; 6064 used_adjncy = pcbddc->mat_graph->adjncy; 6065 } else { 6066 PetscBool flg_row=PETSC_FALSE; 6067 const PetscInt *xadj,*adjncy; 6068 PetscInt nvtxs; 6069 6070 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6071 if (flg_row) { 6072 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6073 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6074 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6075 free_used_adj = PETSC_TRUE; 6076 } else { 6077 pcbddc->sub_schurs_layers = -1; 6078 used_xadj = NULL; 6079 used_adjncy = NULL; 6080 } 6081 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6082 } 6083 } 6084 6085 /* setup sub_schurs data */ 6086 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6087 if (!sub_schurs->schur_explicit) { 6088 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6089 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6090 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6091 } else { 6092 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6093 PetscBool isseqaij; 6094 PetscInt benign_n; 6095 6096 if (!pcbddc->use_vertices && reuse_solvers) { 6097 PetscInt n_vertices; 6098 6099 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6100 reuse_solvers = (PetscBool)!n_vertices; 6101 } 6102 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6103 if (!isseqaij) { 6104 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6105 if (matis->A == pcbddc->local_mat) { 6106 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6107 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6108 } else { 6109 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6110 } 6111 } 6112 if (!pcbddc->benign_change_explicit) { 6113 benign_n = pcbddc->benign_n; 6114 } else { 6115 benign_n = 0; 6116 } 6117 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6118 } 6119 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6120 6121 /* free adjacency */ 6122 if (free_used_adj) { 6123 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6124 } 6125 PetscFunctionReturn(0); 6126 } 6127 6128 #undef __FUNCT__ 6129 #define __FUNCT__ "PCBDDCInitSubSchurs" 6130 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6131 { 6132 PC_IS *pcis=(PC_IS*)pc->data; 6133 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6134 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6135 PCBDDCGraph graph; 6136 PetscErrorCode ierr; 6137 6138 PetscFunctionBegin; 6139 /* attach interface graph for determining subsets */ 6140 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6141 IS verticesIS,verticescomm; 6142 PetscInt vsize,*idxs; 6143 6144 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6145 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6146 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6147 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6148 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6149 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6150 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6151 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6152 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6153 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6154 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6155 /* 6156 if (pcbddc->dbg_flag) { 6157 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6158 } 6159 */ 6160 } else { 6161 graph = pcbddc->mat_graph; 6162 } 6163 6164 /* sub_schurs init */ 6165 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6166 6167 /* free graph struct */ 6168 if (pcbddc->sub_schurs_rebuild) { 6169 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6170 } 6171 PetscFunctionReturn(0); 6172 } 6173 6174 #undef __FUNCT__ 6175 #define __FUNCT__ "PCBDDCCheckOperator" 6176 PetscErrorCode PCBDDCCheckOperator(PC pc) 6177 { 6178 PC_IS *pcis=(PC_IS*)pc->data; 6179 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6180 PetscErrorCode ierr; 6181 6182 PetscFunctionBegin; 6183 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6184 IS zerodiag = NULL; 6185 Mat S_j,B0_B=NULL; 6186 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6187 PetscScalar *p0_check,*array,*array2; 6188 PetscReal norm; 6189 PetscInt i; 6190 6191 /* B0 and B0_B */ 6192 if (zerodiag) { 6193 IS dummy; 6194 6195 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6196 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6197 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6198 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6199 } 6200 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6201 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6202 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6203 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6204 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6205 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6206 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6207 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6208 /* S_j */ 6209 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6210 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6211 6212 /* mimic vector in \widetilde{W}_\Gamma */ 6213 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6214 /* continuous in primal space */ 6215 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6216 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6217 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6218 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6219 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6220 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6221 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6222 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6223 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6224 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6225 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6226 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6227 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6228 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6229 6230 /* assemble rhs for coarse problem */ 6231 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6232 /* local with Schur */ 6233 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6234 if (zerodiag) { 6235 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6236 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6237 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6238 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6239 } 6240 /* sum on primal nodes the local contributions */ 6241 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6242 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6243 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6244 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6245 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6246 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6247 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6248 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6249 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6250 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6251 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6252 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6253 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6254 /* scale primal nodes (BDDC sums contibutions) */ 6255 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6256 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6257 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6258 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6259 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6260 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6261 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6262 /* global: \widetilde{B0}_B w_\Gamma */ 6263 if (zerodiag) { 6264 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6265 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6266 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6267 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6268 } 6269 /* BDDC */ 6270 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6271 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6272 6273 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6274 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6275 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6276 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6277 for (i=0;i<pcbddc->benign_n;i++) { 6278 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6279 } 6280 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6281 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6282 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6283 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6284 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6285 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6286 } 6287 PetscFunctionReturn(0); 6288 } 6289