1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 PetscInt *nnz; 799 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 800 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 801 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 802 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 803 for (i=0;i<pcbddc->benign_n;i++) { 804 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 805 nnz[i] = n - nnz[i]; 806 } 807 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 808 ierr = PetscFree(nnz);CHKERRQ(ierr); 809 } 810 811 for (i=0;i<pcbddc->benign_n;i++) { 812 PetscScalar *array; 813 PetscInt *idxs,j,nz,cum; 814 815 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 818 for (j=0;j<nz;j++) vals[j] = 1.; 819 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 823 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 824 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 825 cum = 0; 826 for (j=0;j<n;j++) { 827 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 828 vals[cum] = array[j]; 829 idxs_ins[cum] = j; 830 cum++; 831 } 832 } 833 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 834 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 835 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 840 } 841 } else { /* push */ 842 if (pcbddc->benign_change_explicit) { 843 PetscInt i; 844 845 for (i=0;i<pcbddc->benign_n;i++) { 846 PetscScalar *B0_vals; 847 PetscInt *B0_cols,B0_ncol; 848 849 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 850 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 851 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 852 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 853 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 859 } 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCAdaptiveSelection" 866 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 867 { 868 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 869 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 870 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 871 PetscBLASInt *B_iwork,*B_ifail; 872 PetscScalar *work,lwork; 873 PetscScalar *St,*S,*eigv; 874 PetscScalar *Sarray,*Starray; 875 PetscReal *eigs,thresh; 876 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 877 PetscBool allocated_S_St; 878 #if defined(PETSC_USE_COMPLEX) 879 PetscReal *rwork; 880 #endif 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 if (!sub_schurs->schur_explicit) { 885 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 886 } 887 888 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 if (pcbddc->dbg_flag) { 893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 if (pcbddc->dbg_flag) { 900 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 901 } 902 903 /* max size of subsets */ 904 mss = 0; 905 for (i=0;i<sub_schurs->n_subs;i++) { 906 PetscInt subset_size; 907 908 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 909 mss = PetscMax(mss,subset_size); 910 } 911 912 /* min/max and threshold */ 913 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 914 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 915 nmax = PetscMax(nmin,nmax); 916 allocated_S_St = PETSC_FALSE; 917 if (nmin) { 918 allocated_S_St = PETSC_TRUE; 919 } 920 921 /* allocate lapack workspace */ 922 cum = cum2 = 0; 923 maxneigs = 0; 924 for (i=0;i<sub_schurs->n_subs;i++) { 925 PetscInt n,subset_size; 926 927 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 928 n = PetscMin(subset_size,nmax); 929 cum += subset_size; 930 cum2 += subset_size*n; 931 maxneigs = PetscMax(maxneigs,n); 932 } 933 if (mss) { 934 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 935 PetscBLASInt B_itype = 1; 936 PetscBLASInt B_N = mss; 937 PetscReal zero = 0.0; 938 PetscReal eps = 0.0; /* dlamch? */ 939 940 B_lwork = -1; 941 S = NULL; 942 St = NULL; 943 eigs = NULL; 944 eigv = NULL; 945 B_iwork = NULL; 946 B_ifail = NULL; 947 #if defined(PETSC_USE_COMPLEX) 948 rwork = NULL; 949 #endif 950 thresh = 1.0; 951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 952 #if defined(PETSC_USE_COMPLEX) 953 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 954 #else 955 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 956 #endif 957 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 958 ierr = PetscFPTrapPop();CHKERRQ(ierr); 959 } else { 960 /* TODO */ 961 } 962 } else { 963 lwork = 0; 964 } 965 966 nv = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 968 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 969 } 970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 971 if (allocated_S_St) { 972 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 973 } 974 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 977 #endif 978 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 979 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 980 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 981 nv+cum,&pcbddc->adaptive_constraints_idxs, 982 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 983 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 984 985 maxneigs = 0; 986 cum = cum2 = cumarray = 0; 987 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 988 pcbddc->adaptive_constraints_data_ptr[0] = 0; 989 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 990 const PetscInt *idxs; 991 992 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 993 for (cum=0;cum<nv;cum++) { 994 pcbddc->adaptive_constraints_n[cum] = 1; 995 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 996 pcbddc->adaptive_constraints_data[cum] = 1.0; 997 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 998 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 999 } 1000 cum2 = cum; 1001 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1002 } 1003 1004 if (mss) { /* multilevel */ 1005 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1006 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1007 } 1008 1009 thresh = pcbddc->adaptive_threshold; 1010 for (i=0;i<sub_schurs->n_subs;i++) { 1011 const PetscInt *idxs; 1012 PetscReal infty = PETSC_MAX_REAL; 1013 PetscInt j,subset_size,eigs_start = 0; 1014 PetscBLASInt B_N; 1015 PetscBool same_data = PETSC_FALSE; 1016 1017 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1018 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1019 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1020 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1021 if (sub_schurs->is_hermitian) { 1022 PetscInt j,k; 1023 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1024 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1026 } 1027 for (j=0;j<subset_size;j++) { 1028 for (k=j;k<subset_size;k++) { 1029 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1030 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1031 } 1032 } 1033 } else { 1034 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1035 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1036 } 1037 } else { 1038 S = Sarray + cumarray; 1039 St = Starray + cumarray; 1040 } 1041 1042 /* see if we can save some work */ 1043 if (sub_schurs->n_subs == 1) { 1044 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1045 } 1046 1047 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1048 B_neigs = 0; 1049 } else { 1050 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1051 PetscBLASInt B_itype = 1; 1052 PetscBLASInt B_IL, B_IU; 1053 PetscReal eps = -1.0; /* dlamch? */ 1054 PetscInt nmin_s; 1055 1056 if (pcbddc->dbg_flag) { 1057 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1058 } 1059 1060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1061 if (thresh > 1.+PETSC_SMALL) { 1062 1063 /* ask for eigenvalues larger than thresh */ 1064 #if defined(PETSC_USE_COMPLEX) 1065 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1066 #else 1067 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1068 #endif 1069 } else { 1070 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1071 B_IL = 1; 1072 #if defined(PETSC_USE_COMPLEX) 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1074 #else 1075 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1076 #endif 1077 } 1078 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1079 if (B_ierr) { 1080 if (B_ierr < 0 ) { 1081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1082 } else if (B_ierr <= B_N) { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1084 } else { 1085 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1086 } 1087 } 1088 1089 if (B_neigs > nmax) { 1090 if (pcbddc->dbg_flag) { 1091 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1092 } 1093 eigs_start = B_neigs -nmax; 1094 B_neigs = nmax; 1095 } 1096 1097 nmin_s = PetscMin(nmin,B_N); 1098 if (B_neigs < nmin_s) { 1099 PetscBLASInt B_neigs2; 1100 1101 B_IU = B_N - B_neigs; 1102 B_IL = B_N - nmin_s + 1; 1103 if (pcbddc->dbg_flag) { 1104 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1105 } 1106 if (sub_schurs->is_hermitian) { 1107 PetscInt j; 1108 for (j=0;j<subset_size;j++) { 1109 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1110 } 1111 for (j=0;j<subset_size;j++) { 1112 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1113 } 1114 } else { 1115 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1116 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1117 } 1118 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1119 #if defined(PETSC_USE_COMPLEX) 1120 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1121 #else 1122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1123 #endif 1124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1125 B_neigs += B_neigs2; 1126 } 1127 if (B_ierr) { 1128 if (B_ierr < 0 ) { 1129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1130 } else if (B_ierr <= B_N) { 1131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1132 } else { 1133 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1134 } 1135 } 1136 if (pcbddc->dbg_flag) { 1137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1138 for (j=0;j<B_neigs;j++) { 1139 if (eigs[j] == 0.0) { 1140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1141 } else { 1142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1143 } 1144 } 1145 } 1146 } else { 1147 /* TODO */ 1148 } 1149 } 1150 maxneigs = PetscMax(B_neigs,maxneigs); 1151 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1152 if (B_neigs) { 1153 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1154 1155 if (pcbddc->dbg_flag > 1) { 1156 PetscInt ii; 1157 for (ii=0;ii<B_neigs;ii++) { 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1159 for (j=0;j<B_N;j++) { 1160 #if defined(PETSC_USE_COMPLEX) 1161 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1162 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1164 #else 1165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1166 #endif 1167 } 1168 } 1169 } 1170 #if 0 1171 for (j=0;j<B_neigs;j++) { 1172 PetscBLASInt Blas_N,Blas_one = 1.0; 1173 PetscScalar norm; 1174 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1175 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1176 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1177 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1178 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1179 } else { 1180 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1181 } 1182 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1183 } 1184 #endif 1185 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1186 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1187 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1188 cum++; 1189 } 1190 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1191 /* shift for next computation */ 1192 cumarray += subset_size*subset_size; 1193 } 1194 if (pcbddc->dbg_flag) { 1195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1196 } 1197 1198 if (mss) { 1199 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1200 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1201 /* destroy matrices (junk) */ 1202 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1203 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1204 } 1205 if (allocated_S_St) { 1206 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1207 } 1208 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1209 #if defined(PETSC_USE_COMPLEX) 1210 ierr = PetscFree(rwork);CHKERRQ(ierr); 1211 #endif 1212 if (pcbddc->dbg_flag) { 1213 PetscInt maxneigs_r; 1214 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "PCBDDCSetUpSolvers" 1222 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1223 { 1224 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1225 PetscScalar *coarse_submat_vals; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1230 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1231 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1232 1233 /* Setup local neumann solver ksp_R */ 1234 /* PCBDDCSetUpLocalScatters should be called first! */ 1235 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1236 1237 /* Change global null space passed in by the user if change of basis has been requested */ 1238 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1239 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1240 } 1241 1242 /* 1243 Setup local correction and local part of coarse basis. 1244 Gives back the dense local part of the coarse matrix in column major ordering 1245 */ 1246 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1247 1248 /* Compute total number of coarse nodes and setup coarse solver */ 1249 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1250 1251 /* free */ 1252 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "PCBDDCResetCustomization" 1258 PetscErrorCode PCBDDCResetCustomization(PC pc) 1259 { 1260 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1265 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1266 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1267 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1268 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1269 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1270 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1271 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1272 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1273 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1274 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1275 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "PCBDDCResetTopography" 1281 PetscErrorCode PCBDDCResetTopography(PC pc) 1282 { 1283 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1284 PetscInt i; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1289 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1290 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1291 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1292 for (i=0;i<pcbddc->n_local_subs;i++) { 1293 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1296 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "PCBDDCResetSolvers" 1302 PetscErrorCode PCBDDCResetSolvers(PC pc) 1303 { 1304 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1309 if (pcbddc->coarse_phi_B) { 1310 PetscScalar *array; 1311 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1312 ierr = PetscFree(array);CHKERRQ(ierr); 1313 } 1314 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1315 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1316 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1317 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1318 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1319 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1320 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1321 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1322 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1323 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1324 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1325 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1328 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1329 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1330 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1331 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1332 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1333 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1334 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1335 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1336 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1338 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1339 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1341 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1342 if (pcbddc->benign_zerodiag_subs) { 1343 PetscInt i; 1344 for (i=0;i<pcbddc->benign_n;i++) { 1345 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1348 } 1349 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1355 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1356 { 1357 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1358 PC_IS *pcis = (PC_IS*)pc->data; 1359 VecType impVecType; 1360 PetscInt n_constraints,n_R,old_size; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (!pcbddc->ConstraintMatrix) { 1365 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1366 } 1367 /* get sizes */ 1368 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1369 n_R = pcis->n - pcbddc->n_vertices; 1370 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1371 /* local work vectors (try to avoid unneeded work)*/ 1372 /* R nodes */ 1373 old_size = -1; 1374 if (pcbddc->vec1_R) { 1375 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1376 } 1377 if (n_R != old_size) { 1378 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1379 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1380 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1381 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1382 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1383 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1384 } 1385 /* local primal dofs */ 1386 old_size = -1; 1387 if (pcbddc->vec1_P) { 1388 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1389 } 1390 if (pcbddc->local_primal_size != old_size) { 1391 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1392 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1393 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1394 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1395 } 1396 /* local explicit constraints */ 1397 old_size = -1; 1398 if (pcbddc->vec1_C) { 1399 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1400 } 1401 if (n_constraints && n_constraints != old_size) { 1402 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1403 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1404 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1405 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "PCBDDCSetUpCorrection" 1412 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1413 { 1414 PetscErrorCode ierr; 1415 /* pointers to pcis and pcbddc */ 1416 PC_IS* pcis = (PC_IS*)pc->data; 1417 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1418 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1419 /* submatrices of local problem */ 1420 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1421 /* submatrices of benign trick */ 1422 Mat B0_V = NULL; 1423 /* submatrices of local coarse problem */ 1424 Mat S_VV,S_CV,S_VC,S_CC; 1425 /* working matrices */ 1426 Mat C_CR; 1427 /* additional working stuff */ 1428 PC pc_R; 1429 Mat F; 1430 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1431 1432 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1433 PetscScalar *work; 1434 PetscInt *idx_V_B; 1435 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1436 PetscInt i,n_R,n_D,n_B; 1437 1438 /* some shortcuts to scalars */ 1439 PetscScalar one=1.0,m_one=-1.0; 1440 1441 PetscFunctionBegin; 1442 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1444 } 1445 1446 /* Set Non-overlapping dimensions */ 1447 n_vertices = pcbddc->n_vertices; 1448 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1449 n_B = pcis->n_B; 1450 n_D = pcis->n - n_B; 1451 n_R = pcis->n - n_vertices; 1452 1453 /* vertices in boundary numbering */ 1454 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1455 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1456 if (i != n_vertices) { 1457 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1458 } 1459 1460 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1461 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1462 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1463 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1464 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1465 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1466 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1467 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1468 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1469 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1470 1471 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1472 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1473 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1474 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1475 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1476 lda_rhs = n_R; 1477 need_benign_correction = PETSC_FALSE; 1478 if (isLU || isILU || isCHOL) { 1479 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1480 } else if (sub_schurs->reuse_solver) { 1481 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1482 MatFactorType type; 1483 1484 F = reuse_solver->F; 1485 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1486 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1487 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1488 need_benign_correction = !!reuse_solver->benign_n; 1489 } else { 1490 F = NULL; 1491 } 1492 1493 /* allocate workspace */ 1494 n = 0; 1495 if (n_constraints) { 1496 n += lda_rhs*n_constraints; 1497 } 1498 if (n_vertices) { 1499 n = PetscMax(2*lda_rhs*n_vertices,n); 1500 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1501 } 1502 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1503 1504 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1505 if (n_constraints) { 1506 Mat M1,M2,M3; 1507 Mat auxmat; 1508 IS is_aux; 1509 PetscScalar *array,*array2; 1510 1511 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1512 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1513 1514 /* Extract constraints on R nodes: C_{CR} */ 1515 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1516 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1517 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1518 1519 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1520 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1521 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1522 for (i=0;i<n_constraints;i++) { 1523 const PetscScalar *row_cmat_values; 1524 const PetscInt *row_cmat_indices; 1525 PetscInt size_of_constraint,j; 1526 1527 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1528 for (j=0;j<size_of_constraint;j++) { 1529 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1530 } 1531 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1532 } 1533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1534 if (F) { 1535 Mat B; 1536 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1538 if (need_benign_correction) { 1539 PetscScalar *marr; 1540 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1541 1542 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1543 for (i=0;i<n_constraints;i++) { 1544 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1545 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1546 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1547 } 1548 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1549 } 1550 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1551 if (need_benign_correction) { 1552 PetscScalar *marr; 1553 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1554 1555 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1556 for (i=0;i<n_constraints;i++) { 1557 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1558 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1559 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1560 } 1561 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1562 } 1563 ierr = MatDestroy(&B);CHKERRQ(ierr); 1564 } else { 1565 PetscScalar *marr; 1566 1567 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1568 for (i=0;i<n_constraints;i++) { 1569 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1570 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1571 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1572 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1573 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1574 } 1575 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1576 } 1577 if (!pcbddc->switch_static) { 1578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1579 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1580 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1581 for (i=0;i<n_constraints;i++) { 1582 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1583 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1587 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1588 } 1589 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1590 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1591 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1592 } else { 1593 if (lda_rhs != n_R) { 1594 IS dummy; 1595 1596 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1597 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1598 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1599 } else { 1600 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1601 pcbddc->local_auxmat2 = local_auxmat2_R; 1602 } 1603 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1604 } 1605 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1606 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1607 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1608 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1609 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1610 if (isCHOL) { 1611 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1612 } else { 1613 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1614 } 1615 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1616 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1617 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1618 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1619 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1620 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1621 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1622 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1623 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1624 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1625 } 1626 /* Get submatrices from subdomain matrix */ 1627 if (pcbddc->benign_n) { 1628 IS dummy; 1629 Mat B0_R; 1630 PetscReal norm; 1631 1632 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1633 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1634 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1635 if (norm > PETSC_SMALL) { 1636 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1637 } 1638 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1639 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1640 } 1641 1642 if (n_vertices) { 1643 IS is_aux; 1644 1645 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1646 IS tis; 1647 1648 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1649 ierr = ISSort(tis);CHKERRQ(ierr); 1650 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1651 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1652 } else { 1653 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1654 } 1655 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1656 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1657 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1658 if (pcbddc->benign_n) { 1659 IS dummy; 1660 1661 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1662 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1663 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1664 } 1665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1666 } 1667 1668 /* Matrix of coarse basis functions (local) */ 1669 if (pcbddc->coarse_phi_B) { 1670 PetscInt on_B,on_primal,on_D=n_D; 1671 if (pcbddc->coarse_phi_D) { 1672 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1673 } 1674 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1675 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1676 PetscScalar *marray; 1677 1678 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1679 ierr = PetscFree(marray);CHKERRQ(ierr); 1680 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1681 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1682 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1683 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1684 } 1685 } 1686 1687 if (!pcbddc->coarse_phi_B) { 1688 PetscScalar *marray; 1689 1690 n = n_B*pcbddc->local_primal_size; 1691 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1692 n += n_D*pcbddc->local_primal_size; 1693 } 1694 if (!pcbddc->symmetric_primal) { 1695 n *= 2; 1696 } 1697 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1698 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1699 n = n_B*pcbddc->local_primal_size; 1700 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1702 n += n_D*pcbddc->local_primal_size; 1703 } 1704 if (!pcbddc->symmetric_primal) { 1705 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1707 n = n_B*pcbddc->local_primal_size; 1708 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1709 } 1710 } else { 1711 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1712 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1713 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1714 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1715 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1716 } 1717 } 1718 } 1719 1720 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1721 p0_lidx_I = NULL; 1722 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1723 const PetscInt *idxs; 1724 1725 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1726 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1727 for (i=0;i<pcbddc->benign_n;i++) { 1728 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1729 } 1730 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1731 } 1732 1733 /* vertices */ 1734 if (n_vertices) { 1735 1736 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1737 1738 if (n_R) { 1739 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1740 PetscBLASInt B_N,B_one = 1; 1741 PetscScalar *x,*y; 1742 PetscBool isseqaij; 1743 1744 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1745 if (need_benign_correction) { 1746 ISLocalToGlobalMapping RtoN; 1747 IS is_p0; 1748 PetscInt *idxs_p0,n; 1749 1750 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1751 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1752 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1753 if (n != pcbddc->benign_n) { 1754 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1755 } 1756 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1757 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1758 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1759 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1760 } 1761 1762 if (lda_rhs == n_R) { 1763 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1764 } else { 1765 PetscScalar *av,*array; 1766 const PetscInt *xadj,*adjncy; 1767 PetscInt n; 1768 PetscBool flg_row; 1769 1770 array = work+lda_rhs*n_vertices; 1771 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1772 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1774 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1775 for (i=0;i<n;i++) { 1776 PetscInt j; 1777 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1778 } 1779 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1780 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1781 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1782 } 1783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1784 if (F) { 1785 if (need_benign_correction) { 1786 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1787 PetscScalar *marr; 1788 1789 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1790 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1791 1792 | 0 0 0 | (V) 1793 L = | 0 0 -1 | (P-p0) 1794 | 0 0 -1 | (p0) 1795 1796 */ 1797 for (i=0;i<reuse_solver->benign_n;i++) { 1798 const PetscScalar *vals; 1799 const PetscInt *idxs,*idxs_zero; 1800 PetscInt n,j,nz; 1801 1802 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1803 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1804 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1805 for (j=0;j<n;j++) { 1806 PetscScalar val = vals[j]; 1807 PetscInt k,col = idxs[j]; 1808 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1809 } 1810 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1811 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1812 } 1813 /* need to correct the rhs */ 1814 for (i=0;i<n_vertices;i++) { 1815 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1816 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1817 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1818 } 1819 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1820 } 1821 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1822 /* need to correct the solution */ 1823 if (need_benign_correction) { 1824 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1825 PetscScalar *marr; 1826 1827 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1828 for (i=0;i<n_vertices;i++) { 1829 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1830 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1831 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1832 } 1833 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1834 } 1835 } else { 1836 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1837 for (i=0;i<n_vertices;i++) { 1838 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1839 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1840 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1841 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1842 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1843 } 1844 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1845 } 1846 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1847 /* S_VV and S_CV */ 1848 if (n_constraints) { 1849 Mat B; 1850 1851 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1852 for (i=0;i<n_vertices;i++) { 1853 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1854 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1858 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1859 } 1860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1861 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1862 ierr = MatDestroy(&B);CHKERRQ(ierr); 1863 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1864 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1865 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1867 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1868 ierr = MatDestroy(&B);CHKERRQ(ierr); 1869 } 1870 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1871 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1872 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1873 } 1874 if (lda_rhs != n_R) { 1875 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1877 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1878 } 1879 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1880 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1881 if (need_benign_correction) { 1882 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1883 PetscScalar *marr,*sums; 1884 1885 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1886 ierr = MatDenseGetArray(S_VVt,&marr); 1887 for (i=0;i<reuse_solver->benign_n;i++) { 1888 const PetscScalar *vals; 1889 const PetscInt *idxs,*idxs_zero; 1890 PetscInt n,j,nz; 1891 1892 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1893 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1894 for (j=0;j<n_vertices;j++) { 1895 PetscInt k; 1896 sums[j] = 0.; 1897 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1898 } 1899 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1900 for (j=0;j<n;j++) { 1901 PetscScalar val = vals[j]; 1902 PetscInt k; 1903 for (k=0;k<n_vertices;k++) { 1904 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1905 } 1906 } 1907 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1908 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1909 } 1910 ierr = PetscFree(sums);CHKERRQ(ierr); 1911 ierr = MatDenseRestoreArray(S_VVt,&marr); 1912 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1913 } 1914 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1915 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1916 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1917 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1918 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1919 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1920 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1921 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1922 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1923 } else { 1924 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1925 } 1926 for (i=0;i<pcbddc->benign_n;i++) { 1927 const PetscScalar *vals; 1928 const PetscInt *idxs; 1929 PetscInt n,j,primal_idx; 1930 1931 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1932 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1933 for (j=0;j<n;j++) { 1934 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1935 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1936 } 1937 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1938 } 1939 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1940 1941 /* coarse basis functions */ 1942 for (i=0;i<n_vertices;i++) { 1943 PetscScalar *y; 1944 1945 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1946 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1947 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1948 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1950 y[n_B*i+idx_V_B[i]] = 1.0; 1951 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1952 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1953 1954 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1955 PetscInt j; 1956 1957 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1958 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1959 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1962 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1963 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1964 } 1965 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1966 } 1967 /* if n_R == 0 the object is not destroyed */ 1968 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1969 } 1970 1971 if (n_constraints) { 1972 Mat B; 1973 1974 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1975 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1976 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1977 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1978 if (n_vertices) { 1979 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1980 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1981 } else { 1982 Mat S_VCt; 1983 1984 if (lda_rhs != n_R) { 1985 ierr = MatDestroy(&B);CHKERRQ(ierr); 1986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1987 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1988 } 1989 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1990 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1991 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1992 } 1993 } 1994 ierr = MatDestroy(&B);CHKERRQ(ierr); 1995 /* coarse basis functions */ 1996 for (i=0;i<n_constraints;i++) { 1997 PetscScalar *y; 1998 1999 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2000 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2005 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2006 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2007 PetscInt j; 2008 2009 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2010 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2011 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2014 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2015 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2016 } 2017 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2018 } 2019 } 2020 if (n_constraints) { 2021 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2022 } 2023 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2024 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2025 2026 /* compute other basis functions for non-symmetric problems */ 2027 if (!pcbddc->symmetric_primal) { 2028 Mat B_V=NULL,B_C=NULL; 2029 PetscScalar *marray; 2030 2031 if (n_constraints) { 2032 Mat S_CCT,C_CRT; 2033 2034 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2035 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2036 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2037 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2038 if (n_vertices) { 2039 Mat S_VCT; 2040 2041 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2042 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2043 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2044 } 2045 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2046 } 2047 if (n_vertices && n_R) { 2048 PetscScalar *av,*marray; 2049 const PetscInt *xadj,*adjncy; 2050 PetscInt n; 2051 PetscBool flg_row; 2052 2053 /* B_V = B_V - A_VR^T */ 2054 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2055 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2056 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2057 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2058 for (i=0;i<n;i++) { 2059 PetscInt j; 2060 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2061 } 2062 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2063 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2064 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2065 } 2066 2067 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2068 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2069 for (i=0;i<n_vertices;i++) { 2070 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2071 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2072 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2073 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2074 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2075 } 2076 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2077 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2078 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2079 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2080 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2081 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2082 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2083 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2084 } 2085 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2086 /* coarse basis functions */ 2087 for (i=0;i<pcbddc->local_primal_size;i++) { 2088 PetscScalar *y; 2089 2090 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2091 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2092 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2093 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 if (i<n_vertices) { 2096 y[n_B*i+idx_V_B[i]] = 1.0; 2097 } 2098 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2099 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2100 2101 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2102 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2103 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2104 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2106 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2107 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2108 } 2109 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2110 } 2111 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2112 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2113 } 2114 /* free memory */ 2115 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2116 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2117 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2118 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2119 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2120 ierr = PetscFree(work);CHKERRQ(ierr); 2121 if (n_vertices) { 2122 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2123 } 2124 if (n_constraints) { 2125 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2126 } 2127 /* Checking coarse_sub_mat and coarse basis functios */ 2128 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2129 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2130 if (pcbddc->dbg_flag) { 2131 Mat coarse_sub_mat; 2132 Mat AUXMAT,TM1,TM2,TM3,TM4; 2133 Mat coarse_phi_D,coarse_phi_B; 2134 Mat coarse_psi_D,coarse_psi_B; 2135 Mat A_II,A_BB,A_IB,A_BI; 2136 Mat C_B,CPHI; 2137 IS is_dummy; 2138 Vec mones; 2139 MatType checkmattype=MATSEQAIJ; 2140 PetscReal real_value; 2141 2142 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2143 Mat A; 2144 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2145 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2146 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2147 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2148 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2149 ierr = MatDestroy(&A);CHKERRQ(ierr); 2150 } else { 2151 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2152 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2153 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2154 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2155 } 2156 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2157 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2158 if (!pcbddc->symmetric_primal) { 2159 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2160 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2161 } 2162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2163 2164 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2166 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2167 if (!pcbddc->symmetric_primal) { 2168 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2169 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2170 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2171 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2172 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2173 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2174 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2175 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2176 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2177 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2178 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2179 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2180 } else { 2181 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2182 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2183 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2184 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2185 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2186 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2187 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2188 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2189 } 2190 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2191 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2192 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2193 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2194 if (pcbddc->benign_n) { 2195 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 2196 PetscScalar *data,*data2; 2197 PetscInt j; 2198 2199 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2200 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 2201 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 2202 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2203 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2204 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2205 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2206 for (j=0;j<pcbddc->benign_n;j++) { 2207 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2208 for (i=0;i<pcbddc->local_primal_size;i++) { 2209 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2210 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2211 } 2212 } 2213 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2214 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2215 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2216 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2217 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2218 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 2219 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 2220 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2221 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 2222 } 2223 #if 0 2224 { 2225 PetscViewer viewer; 2226 char filename[256]; 2227 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2228 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2229 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2230 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2231 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2232 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2233 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2234 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2235 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2236 if (pcbddc->coarse_phi_D) { 2237 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2238 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2239 } 2240 if (pcbddc->coarse_psi_B) { 2241 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2242 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2243 } 2244 if (pcbddc->coarse_phi_B) { 2245 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2246 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2247 } 2248 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2249 } 2250 #endif 2251 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2252 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2254 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2255 2256 /* check constraints */ 2257 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2258 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2259 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2260 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2261 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2262 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2263 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2264 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2266 if (!pcbddc->symmetric_primal) { 2267 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2268 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2269 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2270 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2272 } 2273 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2274 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2275 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2276 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2277 } 2278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2279 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2280 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2281 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2282 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2283 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2284 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2285 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2286 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2287 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2288 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2289 if (!pcbddc->symmetric_primal) { 2290 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2291 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2292 } 2293 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2294 } 2295 /* get back data */ 2296 *coarse_submat_vals_n = coarse_submat_vals; 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2302 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2303 { 2304 Mat *work_mat; 2305 IS isrow_s,iscol_s; 2306 PetscBool rsorted,csorted; 2307 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2312 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2313 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2314 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2315 2316 if (!rsorted) { 2317 const PetscInt *idxs; 2318 PetscInt *idxs_sorted,i; 2319 2320 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2321 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2322 for (i=0;i<rsize;i++) { 2323 idxs_perm_r[i] = i; 2324 } 2325 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2326 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2327 for (i=0;i<rsize;i++) { 2328 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2329 } 2330 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2331 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2332 } else { 2333 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2334 isrow_s = isrow; 2335 } 2336 2337 if (!csorted) { 2338 if (isrow == iscol) { 2339 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2340 iscol_s = isrow_s; 2341 } else { 2342 const PetscInt *idxs; 2343 PetscInt *idxs_sorted,i; 2344 2345 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2346 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2347 for (i=0;i<csize;i++) { 2348 idxs_perm_c[i] = i; 2349 } 2350 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2351 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2352 for (i=0;i<csize;i++) { 2353 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2354 } 2355 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2356 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2357 } 2358 } else { 2359 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2360 iscol_s = iscol; 2361 } 2362 2363 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2364 2365 if (!rsorted || !csorted) { 2366 Mat new_mat; 2367 IS is_perm_r,is_perm_c; 2368 2369 if (!rsorted) { 2370 PetscInt *idxs_r,i; 2371 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2372 for (i=0;i<rsize;i++) { 2373 idxs_r[idxs_perm_r[i]] = i; 2374 } 2375 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2376 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2377 } else { 2378 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2379 } 2380 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2381 2382 if (!csorted) { 2383 if (isrow_s == iscol_s) { 2384 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2385 is_perm_c = is_perm_r; 2386 } else { 2387 PetscInt *idxs_c,i; 2388 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2389 for (i=0;i<csize;i++) { 2390 idxs_c[idxs_perm_c[i]] = i; 2391 } 2392 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2394 } 2395 } else { 2396 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2397 } 2398 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2399 2400 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2401 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2402 work_mat[0] = new_mat; 2403 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2404 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2405 } 2406 2407 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2408 *B = work_mat[0]; 2409 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2410 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2411 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2417 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2418 { 2419 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2420 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2421 Mat new_mat; 2422 IS is_local,is_global; 2423 PetscInt local_size; 2424 PetscBool isseqaij; 2425 PetscErrorCode ierr; 2426 2427 PetscFunctionBegin; 2428 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2429 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2430 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2431 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2432 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2433 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2434 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2435 2436 /* check */ 2437 if (pcbddc->dbg_flag) { 2438 Vec x,x_change; 2439 PetscReal error; 2440 2441 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2442 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2443 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2444 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2445 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2446 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2447 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2448 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2449 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2450 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2451 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2453 ierr = VecDestroy(&x);CHKERRQ(ierr); 2454 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2455 } 2456 2457 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2458 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2459 if (isseqaij) { 2460 Mat M; 2461 2462 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2463 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2464 ierr = MatDestroy(&M);CHKERRQ(ierr); 2465 } else { 2466 Mat work_mat,M; 2467 2468 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2469 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2470 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2471 ierr = MatDestroy(&M);CHKERRQ(ierr); 2472 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2473 } 2474 if (matis->A->symmetric_set) { 2475 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2476 #if !defined(PETSC_USE_COMPLEX) 2477 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2478 #endif 2479 } 2480 /* 2481 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2482 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2483 */ 2484 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2490 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2491 { 2492 PC_IS* pcis = (PC_IS*)(pc->data); 2493 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2494 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2495 PetscInt *idx_R_local=NULL; 2496 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2497 PetscInt vbs,bs; 2498 PetscBT bitmask=NULL; 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 /* 2503 No need to setup local scatters if 2504 - primal space is unchanged 2505 AND 2506 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2507 AND 2508 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2509 */ 2510 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2511 PetscFunctionReturn(0); 2512 } 2513 /* destroy old objects */ 2514 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2515 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2516 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2517 /* Set Non-overlapping dimensions */ 2518 n_B = pcis->n_B; 2519 n_D = pcis->n - n_B; 2520 n_vertices = pcbddc->n_vertices; 2521 2522 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2523 2524 /* create auxiliary bitmask and allocate workspace */ 2525 if (!sub_schurs->reuse_solver) { 2526 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2527 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2528 for (i=0;i<n_vertices;i++) { 2529 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2530 } 2531 2532 for (i=0, n_R=0; i<pcis->n; i++) { 2533 if (!PetscBTLookup(bitmask,i)) { 2534 idx_R_local[n_R++] = i; 2535 } 2536 } 2537 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2538 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2539 2540 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2541 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2542 } 2543 2544 /* Block code */ 2545 vbs = 1; 2546 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2547 if (bs>1 && !(n_vertices%bs)) { 2548 PetscBool is_blocked = PETSC_TRUE; 2549 PetscInt *vary; 2550 if (!sub_schurs->reuse_solver) { 2551 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2552 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2553 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2554 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2555 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2556 for (i=0; i<pcis->n/bs; i++) { 2557 if (vary[i]!=0 && vary[i]!=bs) { 2558 is_blocked = PETSC_FALSE; 2559 break; 2560 } 2561 } 2562 ierr = PetscFree(vary);CHKERRQ(ierr); 2563 } else { 2564 /* Verify directly the R set */ 2565 for (i=0; i<n_R/bs; i++) { 2566 PetscInt j,node=idx_R_local[bs*i]; 2567 for (j=1; j<bs; j++) { 2568 if (node != idx_R_local[bs*i+j]-j) { 2569 is_blocked = PETSC_FALSE; 2570 break; 2571 } 2572 } 2573 } 2574 } 2575 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2576 vbs = bs; 2577 for (i=0;i<n_R/vbs;i++) { 2578 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2579 } 2580 } 2581 } 2582 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2583 if (sub_schurs->reuse_solver) { 2584 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2585 2586 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2587 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2588 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2589 reuse_solver->is_R = pcbddc->is_R_local; 2590 } else { 2591 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2592 } 2593 2594 /* print some info if requested */ 2595 if (pcbddc->dbg_flag) { 2596 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2597 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2598 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2602 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2603 } 2604 2605 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2606 if (!sub_schurs->reuse_solver) { 2607 IS is_aux1,is_aux2; 2608 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2609 2610 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2611 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2612 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2613 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2614 for (i=0; i<n_D; i++) { 2615 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2616 } 2617 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2618 for (i=0, j=0; i<n_R; i++) { 2619 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2620 aux_array1[j++] = i; 2621 } 2622 } 2623 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2624 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2625 for (i=0, j=0; i<n_B; i++) { 2626 if (!PetscBTLookup(bitmask,is_indices[i])) { 2627 aux_array2[j++] = i; 2628 } 2629 } 2630 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2631 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2632 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2633 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2634 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2635 2636 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2637 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2638 for (i=0, j=0; i<n_R; i++) { 2639 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2640 aux_array1[j++] = i; 2641 } 2642 } 2643 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2644 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2645 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2646 } 2647 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2648 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2649 } else { 2650 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2651 IS tis; 2652 PetscInt schur_size; 2653 2654 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2655 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2656 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2657 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2658 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2659 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2660 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2661 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2662 } 2663 } 2664 PetscFunctionReturn(0); 2665 } 2666 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2670 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2671 { 2672 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2673 PC_IS *pcis = (PC_IS*)pc->data; 2674 PC pc_temp; 2675 Mat A_RR; 2676 MatReuse reuse; 2677 PetscScalar m_one = -1.0; 2678 PetscReal value; 2679 PetscInt n_D,n_R; 2680 PetscBool use_exact,use_exact_reduced,issbaij; 2681 PetscErrorCode ierr; 2682 /* prefixes stuff */ 2683 char dir_prefix[256],neu_prefix[256],str_level[16]; 2684 size_t len; 2685 2686 PetscFunctionBegin; 2687 2688 /* compute prefixes */ 2689 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2690 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2691 if (!pcbddc->current_level) { 2692 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2693 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2694 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2695 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2696 } else { 2697 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2698 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2699 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2700 len -= 15; /* remove "pc_bddc_coarse_" */ 2701 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2702 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2703 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2704 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2705 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2706 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2707 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2708 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2709 } 2710 2711 /* DIRICHLET PROBLEM */ 2712 if (dirichlet) { 2713 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2714 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2715 if (!sub_schurs->reuse_solver) { 2716 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2717 } 2718 if (pcbddc->dbg_flag) { 2719 Mat A_IIn; 2720 2721 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2722 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2723 pcis->A_II = A_IIn; 2724 } 2725 } 2726 if (pcbddc->local_mat->symmetric_set) { 2727 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2728 } 2729 /* Matrix for Dirichlet problem is pcis->A_II */ 2730 n_D = pcis->n - pcis->n_B; 2731 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2732 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2733 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2734 /* default */ 2735 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2736 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2737 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2738 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2739 if (issbaij) { 2740 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2741 } else { 2742 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2743 } 2744 /* Allow user's customization */ 2745 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2746 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2747 } 2748 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2749 if (sub_schurs->reuse_solver) { 2750 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2751 2752 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2753 } 2754 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2755 if (!n_D) { 2756 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2757 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2758 } 2759 /* Set Up KSP for Dirichlet problem of BDDC */ 2760 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2761 /* set ksp_D into pcis data */ 2762 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2763 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2764 pcis->ksp_D = pcbddc->ksp_D; 2765 } 2766 2767 /* NEUMANN PROBLEM */ 2768 A_RR = 0; 2769 if (neumann) { 2770 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2771 PetscInt ibs,mbs; 2772 PetscBool issbaij; 2773 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2774 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2775 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2776 if (pcbddc->ksp_R) { /* already created ksp */ 2777 PetscInt nn_R; 2778 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2779 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2780 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2781 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2782 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2783 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2784 reuse = MAT_INITIAL_MATRIX; 2785 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2786 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2787 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2788 reuse = MAT_INITIAL_MATRIX; 2789 } else { /* safe to reuse the matrix */ 2790 reuse = MAT_REUSE_MATRIX; 2791 } 2792 } 2793 /* last check */ 2794 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2795 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2796 reuse = MAT_INITIAL_MATRIX; 2797 } 2798 } else { /* first time, so we need to create the matrix */ 2799 reuse = MAT_INITIAL_MATRIX; 2800 } 2801 /* extract A_RR */ 2802 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2803 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2804 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2805 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2806 if (matis->A == pcbddc->local_mat) { 2807 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2808 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2809 } else { 2810 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2811 } 2812 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2813 if (matis->A == pcbddc->local_mat) { 2814 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2815 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2816 } else { 2817 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2818 } 2819 } 2820 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2821 if (pcbddc->local_mat->symmetric_set) { 2822 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2823 } 2824 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2825 Mat A_RRn; 2826 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2827 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2828 A_RR = A_RRn; 2829 } 2830 if (!pcbddc->ksp_R) { /* create object if not present */ 2831 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2832 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2833 /* default */ 2834 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2835 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2836 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2837 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2838 if (issbaij) { 2839 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2840 } else { 2841 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2842 } 2843 /* Allow user's customization */ 2844 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2845 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2846 } 2847 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2848 if (!n_R) { 2849 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2850 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2851 } 2852 /* Reuse solver if it is present */ 2853 if (sub_schurs->reuse_solver) { 2854 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2855 2856 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2857 } 2858 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2859 /* Set Up KSP for Neumann problem of BDDC */ 2860 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2861 } 2862 /* free Neumann problem's matrix */ 2863 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2864 2865 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2866 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2867 if (pcbddc->dbg_flag) { 2868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2869 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2870 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2871 } 2872 if (dirichlet) { /* Dirichlet */ 2873 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2874 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2875 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2876 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2877 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2878 /* need to be adapted? */ 2879 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2880 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2881 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2882 /* print info */ 2883 if (pcbddc->dbg_flag) { 2884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2885 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2886 } 2887 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2888 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2889 } 2890 } 2891 if (neumann) { /* Neumann */ 2892 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2893 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2894 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2895 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2896 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2897 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2898 /* need to be adapted? */ 2899 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2900 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2901 /* print info */ 2902 if (pcbddc->dbg_flag) { 2903 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2905 } 2906 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2907 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2908 } 2909 } 2910 } 2911 PetscFunctionReturn(0); 2912 } 2913 2914 #undef __FUNCT__ 2915 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2916 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2917 { 2918 PetscErrorCode ierr; 2919 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2920 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2921 2922 PetscFunctionBegin; 2923 if (!sub_schurs->reuse_solver) { 2924 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2925 } 2926 if (!pcbddc->switch_static) { 2927 if (applytranspose && pcbddc->local_auxmat1) { 2928 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2929 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2930 } 2931 if (!sub_schurs->reuse_solver) { 2932 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2933 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2934 } else { 2935 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2936 2937 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2938 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2939 } 2940 } else { 2941 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2942 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2943 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2944 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2945 if (applytranspose && pcbddc->local_auxmat1) { 2946 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2947 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2948 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2949 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2950 } 2951 } 2952 if (!sub_schurs->reuse_solver) { 2953 if (applytranspose) { 2954 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2955 } else { 2956 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2957 } 2958 } else { 2959 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2960 2961 if (applytranspose) { 2962 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2963 } else { 2964 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2965 } 2966 } 2967 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2968 if (!pcbddc->switch_static) { 2969 if (!sub_schurs->reuse_solver) { 2970 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2971 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2972 } else { 2973 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2974 2975 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2976 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2977 } 2978 if (!applytranspose && pcbddc->local_auxmat1) { 2979 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2980 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2981 } 2982 } else { 2983 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2984 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2985 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2986 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2987 if (!applytranspose && pcbddc->local_auxmat1) { 2988 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2989 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2990 } 2991 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2992 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2993 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2994 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2995 } 2996 PetscFunctionReturn(0); 2997 } 2998 2999 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3002 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3003 { 3004 PetscErrorCode ierr; 3005 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3006 PC_IS* pcis = (PC_IS*) (pc->data); 3007 const PetscScalar zero = 0.0; 3008 3009 PetscFunctionBegin; 3010 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3011 if (!pcbddc->benign_apply_coarse_only) { 3012 if (applytranspose) { 3013 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3014 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3015 } else { 3016 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3017 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3018 } 3019 } else { 3020 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3021 } 3022 3023 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3024 if (pcbddc->benign_n) { 3025 PetscScalar *array; 3026 PetscInt j; 3027 3028 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3029 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3030 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3031 } 3032 3033 /* start communications from local primal nodes to rhs of coarse solver */ 3034 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3035 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3036 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3037 3038 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3039 /* TODO remove null space when doing multilevel */ 3040 if (pcbddc->coarse_ksp) { 3041 Mat coarse_mat; 3042 Vec rhs,sol; 3043 MatNullSpace nullsp; 3044 3045 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3046 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3047 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3048 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3049 if (nullsp) { 3050 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3051 } 3052 if (applytranspose) { 3053 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3054 } else { 3055 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3056 } 3057 if (nullsp) { 3058 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3059 } 3060 } 3061 3062 /* Local solution on R nodes */ 3063 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3064 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3065 } 3066 /* communications from coarse sol to local primal nodes */ 3067 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3068 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3069 3070 /* Sum contributions from the two levels */ 3071 if (!pcbddc->benign_apply_coarse_only) { 3072 if (applytranspose) { 3073 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3074 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3075 } else { 3076 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3077 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3078 } 3079 /* store p0 */ 3080 if (pcbddc->benign_n) { 3081 PetscScalar *array; 3082 PetscInt j; 3083 3084 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3085 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3086 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3087 } 3088 } else { /* expand the coarse solution */ 3089 if (applytranspose) { 3090 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3091 } else { 3092 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3093 } 3094 } 3095 PetscFunctionReturn(0); 3096 } 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3100 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3101 { 3102 PetscErrorCode ierr; 3103 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3104 PetscScalar *array; 3105 Vec from,to; 3106 3107 PetscFunctionBegin; 3108 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3109 from = pcbddc->coarse_vec; 3110 to = pcbddc->vec1_P; 3111 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3112 Vec tvec; 3113 3114 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3115 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3116 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3117 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3118 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3119 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3120 } 3121 } else { /* from local to global -> put data in coarse right hand side */ 3122 from = pcbddc->vec1_P; 3123 to = pcbddc->coarse_vec; 3124 } 3125 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3131 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3132 { 3133 PetscErrorCode ierr; 3134 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3135 PetscScalar *array; 3136 Vec from,to; 3137 3138 PetscFunctionBegin; 3139 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3140 from = pcbddc->coarse_vec; 3141 to = pcbddc->vec1_P; 3142 } else { /* from local to global -> put data in coarse right hand side */ 3143 from = pcbddc->vec1_P; 3144 to = pcbddc->coarse_vec; 3145 } 3146 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3147 if (smode == SCATTER_FORWARD) { 3148 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3149 Vec tvec; 3150 3151 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3152 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3153 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3154 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3155 } 3156 } else { 3157 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3158 ierr = VecResetArray(from);CHKERRQ(ierr); 3159 } 3160 } 3161 PetscFunctionReturn(0); 3162 } 3163 3164 /* uncomment for testing purposes */ 3165 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3168 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3169 { 3170 PetscErrorCode ierr; 3171 PC_IS* pcis = (PC_IS*)(pc->data); 3172 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3173 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3174 /* one and zero */ 3175 PetscScalar one=1.0,zero=0.0; 3176 /* space to store constraints and their local indices */ 3177 PetscScalar *constraints_data; 3178 PetscInt *constraints_idxs,*constraints_idxs_B; 3179 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3180 PetscInt *constraints_n; 3181 /* iterators */ 3182 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3183 /* BLAS integers */ 3184 PetscBLASInt lwork,lierr; 3185 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3186 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3187 /* reuse */ 3188 PetscInt olocal_primal_size,olocal_primal_size_cc; 3189 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3190 /* change of basis */ 3191 PetscBool qr_needed; 3192 PetscBT change_basis,qr_needed_idx; 3193 /* auxiliary stuff */ 3194 PetscInt *nnz,*is_indices; 3195 PetscInt ncc; 3196 /* some quantities */ 3197 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3198 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3199 3200 PetscFunctionBegin; 3201 /* Destroy Mat objects computed previously */ 3202 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3203 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3204 /* save info on constraints from previous setup (if any) */ 3205 olocal_primal_size = pcbddc->local_primal_size; 3206 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3207 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3208 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3209 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3210 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3211 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3212 3213 /* print some info */ 3214 if (pcbddc->dbg_flag) { 3215 IS vertices; 3216 PetscInt nv,nedges,nfaces; 3217 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3218 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3219 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3220 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3223 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3225 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3226 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3227 } 3228 3229 if (!pcbddc->adaptive_selection) { 3230 IS ISForVertices,*ISForFaces,*ISForEdges; 3231 MatNullSpace nearnullsp; 3232 const Vec *nearnullvecs; 3233 Vec *localnearnullsp; 3234 PetscScalar *array; 3235 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3236 PetscBool nnsp_has_cnst; 3237 /* LAPACK working arrays for SVD or POD */ 3238 PetscBool skip_lapack,boolforchange; 3239 PetscScalar *work; 3240 PetscReal *singular_vals; 3241 #if defined(PETSC_USE_COMPLEX) 3242 PetscReal *rwork; 3243 #endif 3244 #if defined(PETSC_MISSING_LAPACK_GESVD) 3245 PetscScalar *temp_basis,*correlation_mat; 3246 #else 3247 PetscBLASInt dummy_int=1; 3248 PetscScalar dummy_scalar=1.; 3249 #endif 3250 3251 /* Get index sets for faces, edges and vertices from graph */ 3252 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3253 /* free unneeded index sets */ 3254 if (!pcbddc->use_vertices) { 3255 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3256 } 3257 if (!pcbddc->use_edges) { 3258 for (i=0;i<n_ISForEdges;i++) { 3259 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3260 } 3261 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3262 n_ISForEdges = 0; 3263 } 3264 if (!pcbddc->use_faces) { 3265 for (i=0;i<n_ISForFaces;i++) { 3266 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3267 } 3268 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3269 n_ISForFaces = 0; 3270 } 3271 3272 #if defined(PETSC_USE_DEBUG) 3273 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3274 Also use_change_of_basis should be consistent among processors */ 3275 if (pcbddc->NullSpace) { 3276 PetscBool tbool[2],gbool[2]; 3277 3278 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3279 pcbddc->use_change_of_basis = PETSC_TRUE; 3280 if (!ISForEdges) { 3281 pcbddc->use_change_on_faces = PETSC_TRUE; 3282 } 3283 } 3284 tbool[0] = pcbddc->use_change_of_basis; 3285 tbool[1] = pcbddc->use_change_on_faces; 3286 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3287 pcbddc->use_change_of_basis = gbool[0]; 3288 pcbddc->use_change_on_faces = gbool[1]; 3289 } 3290 #endif 3291 3292 /* check if near null space is attached to global mat */ 3293 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3294 if (nearnullsp) { 3295 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3296 /* remove any stored info */ 3297 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3298 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3299 /* store information for BDDC solver reuse */ 3300 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3301 pcbddc->onearnullspace = nearnullsp; 3302 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3303 for (i=0;i<nnsp_size;i++) { 3304 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3305 } 3306 } else { /* if near null space is not provided BDDC uses constants by default */ 3307 nnsp_size = 0; 3308 nnsp_has_cnst = PETSC_TRUE; 3309 } 3310 /* get max number of constraints on a single cc */ 3311 max_constraints = nnsp_size; 3312 if (nnsp_has_cnst) max_constraints++; 3313 3314 /* 3315 Evaluate maximum storage size needed by the procedure 3316 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3317 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3318 There can be multiple constraints per connected component 3319 */ 3320 n_vertices = 0; 3321 if (ISForVertices) { 3322 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3323 } 3324 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3325 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3326 3327 total_counts = n_ISForFaces+n_ISForEdges; 3328 total_counts *= max_constraints; 3329 total_counts += n_vertices; 3330 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3331 3332 total_counts = 0; 3333 max_size_of_constraint = 0; 3334 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3335 IS used_is; 3336 if (i<n_ISForEdges) { 3337 used_is = ISForEdges[i]; 3338 } else { 3339 used_is = ISForFaces[i-n_ISForEdges]; 3340 } 3341 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3342 total_counts += j; 3343 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3344 } 3345 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3346 3347 /* get local part of global near null space vectors */ 3348 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3349 for (k=0;k<nnsp_size;k++) { 3350 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3351 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3352 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3353 } 3354 3355 /* whether or not to skip lapack calls */ 3356 skip_lapack = PETSC_TRUE; 3357 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3358 3359 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3360 if (!skip_lapack) { 3361 PetscScalar temp_work; 3362 3363 #if defined(PETSC_MISSING_LAPACK_GESVD) 3364 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3365 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3366 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3367 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3368 #if defined(PETSC_USE_COMPLEX) 3369 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3370 #endif 3371 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3372 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3373 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3374 lwork = -1; 3375 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3376 #if !defined(PETSC_USE_COMPLEX) 3377 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3378 #else 3379 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3380 #endif 3381 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3382 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3383 #else /* on missing GESVD */ 3384 /* SVD */ 3385 PetscInt max_n,min_n; 3386 max_n = max_size_of_constraint; 3387 min_n = max_constraints; 3388 if (max_size_of_constraint < max_constraints) { 3389 min_n = max_size_of_constraint; 3390 max_n = max_constraints; 3391 } 3392 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3393 #if defined(PETSC_USE_COMPLEX) 3394 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3395 #endif 3396 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3397 lwork = -1; 3398 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3399 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3400 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3401 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3402 #if !defined(PETSC_USE_COMPLEX) 3403 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3404 #else 3405 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3406 #endif 3407 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3408 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3409 #endif /* on missing GESVD */ 3410 /* Allocate optimal workspace */ 3411 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3412 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3413 } 3414 /* Now we can loop on constraining sets */ 3415 total_counts = 0; 3416 constraints_idxs_ptr[0] = 0; 3417 constraints_data_ptr[0] = 0; 3418 /* vertices */ 3419 if (n_vertices) { 3420 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3421 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3422 for (i=0;i<n_vertices;i++) { 3423 constraints_n[total_counts] = 1; 3424 constraints_data[total_counts] = 1.0; 3425 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3426 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3427 total_counts++; 3428 } 3429 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3430 n_vertices = total_counts; 3431 } 3432 3433 /* edges and faces */ 3434 total_counts_cc = total_counts; 3435 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3436 IS used_is; 3437 PetscBool idxs_copied = PETSC_FALSE; 3438 3439 if (ncc<n_ISForEdges) { 3440 used_is = ISForEdges[ncc]; 3441 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3442 } else { 3443 used_is = ISForFaces[ncc-n_ISForEdges]; 3444 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3445 } 3446 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3447 3448 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3449 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3450 /* change of basis should not be performed on local periodic nodes */ 3451 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3452 if (nnsp_has_cnst) { 3453 PetscScalar quad_value; 3454 3455 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3456 idxs_copied = PETSC_TRUE; 3457 3458 if (!pcbddc->use_nnsp_true) { 3459 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3460 } else { 3461 quad_value = 1.0; 3462 } 3463 for (j=0;j<size_of_constraint;j++) { 3464 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3465 } 3466 temp_constraints++; 3467 total_counts++; 3468 } 3469 for (k=0;k<nnsp_size;k++) { 3470 PetscReal real_value; 3471 PetscScalar *ptr_to_data; 3472 3473 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3474 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3475 for (j=0;j<size_of_constraint;j++) { 3476 ptr_to_data[j] = array[is_indices[j]]; 3477 } 3478 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3479 /* check if array is null on the connected component */ 3480 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3481 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3482 if (real_value > 0.0) { /* keep indices and values */ 3483 temp_constraints++; 3484 total_counts++; 3485 if (!idxs_copied) { 3486 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3487 idxs_copied = PETSC_TRUE; 3488 } 3489 } 3490 } 3491 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3492 valid_constraints = temp_constraints; 3493 if (!pcbddc->use_nnsp_true && temp_constraints) { 3494 if (temp_constraints == 1) { /* just normalize the constraint */ 3495 PetscScalar norm,*ptr_to_data; 3496 3497 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3499 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3500 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3501 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3502 } else { /* perform SVD */ 3503 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3504 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3505 3506 #if defined(PETSC_MISSING_LAPACK_GESVD) 3507 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3508 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3509 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3510 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3511 from that computed using LAPACKgesvd 3512 -> This is due to a different computation of eigenvectors in LAPACKheev 3513 -> The quality of the POD-computed basis will be the same */ 3514 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3515 /* Store upper triangular part of correlation matrix */ 3516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3517 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3518 for (j=0;j<temp_constraints;j++) { 3519 for (k=0;k<j+1;k++) { 3520 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3521 } 3522 } 3523 /* compute eigenvalues and eigenvectors of correlation matrix */ 3524 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3525 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3526 #if !defined(PETSC_USE_COMPLEX) 3527 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3528 #else 3529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3530 #endif 3531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3532 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3533 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3534 j = 0; 3535 while (j < temp_constraints && singular_vals[j] < tol) j++; 3536 total_counts = total_counts-j; 3537 valid_constraints = temp_constraints-j; 3538 /* scale and copy POD basis into used quadrature memory */ 3539 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3540 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3541 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3543 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3545 if (j<temp_constraints) { 3546 PetscInt ii; 3547 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3548 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3549 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3550 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3551 for (k=0;k<temp_constraints-j;k++) { 3552 for (ii=0;ii<size_of_constraint;ii++) { 3553 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3554 } 3555 } 3556 } 3557 #else /* on missing GESVD */ 3558 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3559 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3560 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3561 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3562 #if !defined(PETSC_USE_COMPLEX) 3563 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3564 #else 3565 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3566 #endif 3567 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3568 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3569 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3570 k = temp_constraints; 3571 if (k > size_of_constraint) k = size_of_constraint; 3572 j = 0; 3573 while (j < k && singular_vals[k-j-1] < tol) j++; 3574 valid_constraints = k-j; 3575 total_counts = total_counts-temp_constraints+valid_constraints; 3576 #endif /* on missing GESVD */ 3577 } 3578 } 3579 /* update pointers information */ 3580 if (valid_constraints) { 3581 constraints_n[total_counts_cc] = valid_constraints; 3582 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3583 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3584 /* set change_of_basis flag */ 3585 if (boolforchange) { 3586 PetscBTSet(change_basis,total_counts_cc); 3587 } 3588 total_counts_cc++; 3589 } 3590 } 3591 /* free workspace */ 3592 if (!skip_lapack) { 3593 ierr = PetscFree(work);CHKERRQ(ierr); 3594 #if defined(PETSC_USE_COMPLEX) 3595 ierr = PetscFree(rwork);CHKERRQ(ierr); 3596 #endif 3597 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3598 #if defined(PETSC_MISSING_LAPACK_GESVD) 3599 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3600 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3601 #endif 3602 } 3603 for (k=0;k<nnsp_size;k++) { 3604 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3605 } 3606 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3607 /* free index sets of faces, edges and vertices */ 3608 for (i=0;i<n_ISForFaces;i++) { 3609 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3610 } 3611 if (n_ISForFaces) { 3612 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3613 } 3614 for (i=0;i<n_ISForEdges;i++) { 3615 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3616 } 3617 if (n_ISForEdges) { 3618 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3619 } 3620 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3621 } else { 3622 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3623 3624 total_counts = 0; 3625 n_vertices = 0; 3626 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3627 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3628 } 3629 max_constraints = 0; 3630 total_counts_cc = 0; 3631 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3632 total_counts += pcbddc->adaptive_constraints_n[i]; 3633 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3634 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3635 } 3636 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3637 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3638 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3639 constraints_data = pcbddc->adaptive_constraints_data; 3640 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3641 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3642 total_counts_cc = 0; 3643 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3644 if (pcbddc->adaptive_constraints_n[i]) { 3645 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3646 } 3647 } 3648 #if 0 3649 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3650 for (i=0;i<total_counts_cc;i++) { 3651 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3652 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3653 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3654 printf(" %d",constraints_idxs[j]); 3655 } 3656 printf("\n"); 3657 printf("number of cc: %d\n",constraints_n[i]); 3658 } 3659 for (i=0;i<n_vertices;i++) { 3660 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3661 } 3662 for (i=0;i<sub_schurs->n_subs;i++) { 3663 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3664 } 3665 #endif 3666 3667 max_size_of_constraint = 0; 3668 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3669 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3670 /* Change of basis */ 3671 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3672 if (pcbddc->use_change_of_basis) { 3673 for (i=0;i<sub_schurs->n_subs;i++) { 3674 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3675 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3676 } 3677 } 3678 } 3679 } 3680 pcbddc->local_primal_size = total_counts; 3681 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3682 3683 /* map constraints_idxs in boundary numbering */ 3684 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3685 if (i != constraints_idxs_ptr[total_counts_cc]) { 3686 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3687 } 3688 3689 /* Create constraint matrix */ 3690 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3691 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3692 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3693 3694 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3695 /* determine if a QR strategy is needed for change of basis */ 3696 qr_needed = PETSC_FALSE; 3697 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3698 total_primal_vertices=0; 3699 pcbddc->local_primal_size_cc = 0; 3700 for (i=0;i<total_counts_cc;i++) { 3701 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3702 if (size_of_constraint == 1) { 3703 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3704 pcbddc->local_primal_size_cc += 1; 3705 } else if (PetscBTLookup(change_basis,i)) { 3706 for (k=0;k<constraints_n[i];k++) { 3707 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3708 } 3709 pcbddc->local_primal_size_cc += constraints_n[i]; 3710 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3711 PetscBTSet(qr_needed_idx,i); 3712 qr_needed = PETSC_TRUE; 3713 } 3714 } else { 3715 pcbddc->local_primal_size_cc += 1; 3716 } 3717 } 3718 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3719 pcbddc->n_vertices = total_primal_vertices; 3720 /* permute indices in order to have a sorted set of vertices */ 3721 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3722 3723 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3724 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3725 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3726 3727 /* nonzero structure of constraint matrix */ 3728 /* and get reference dof for local constraints */ 3729 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3730 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3731 3732 j = total_primal_vertices; 3733 total_counts = total_primal_vertices; 3734 cum = total_primal_vertices; 3735 for (i=n_vertices;i<total_counts_cc;i++) { 3736 if (!PetscBTLookup(change_basis,i)) { 3737 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3738 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3739 cum++; 3740 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3741 for (k=0;k<constraints_n[i];k++) { 3742 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3743 nnz[j+k] = size_of_constraint; 3744 } 3745 j += constraints_n[i]; 3746 } 3747 } 3748 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3749 ierr = PetscFree(nnz);CHKERRQ(ierr); 3750 3751 /* set values in constraint matrix */ 3752 for (i=0;i<total_primal_vertices;i++) { 3753 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3754 } 3755 total_counts = total_primal_vertices; 3756 for (i=n_vertices;i<total_counts_cc;i++) { 3757 if (!PetscBTLookup(change_basis,i)) { 3758 PetscInt *cols; 3759 3760 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3761 cols = constraints_idxs+constraints_idxs_ptr[i]; 3762 for (k=0;k<constraints_n[i];k++) { 3763 PetscInt row = total_counts+k; 3764 PetscScalar *vals; 3765 3766 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3767 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3768 } 3769 total_counts += constraints_n[i]; 3770 } 3771 } 3772 /* assembling */ 3773 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3774 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3775 3776 /* 3777 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3778 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3779 */ 3780 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3781 if (pcbddc->use_change_of_basis) { 3782 /* dual and primal dofs on a single cc */ 3783 PetscInt dual_dofs,primal_dofs; 3784 /* working stuff for GEQRF */ 3785 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3786 PetscBLASInt lqr_work; 3787 /* working stuff for UNGQR */ 3788 PetscScalar *gqr_work,lgqr_work_t; 3789 PetscBLASInt lgqr_work; 3790 /* working stuff for TRTRS */ 3791 PetscScalar *trs_rhs; 3792 PetscBLASInt Blas_NRHS; 3793 /* pointers for values insertion into change of basis matrix */ 3794 PetscInt *start_rows,*start_cols; 3795 PetscScalar *start_vals; 3796 /* working stuff for values insertion */ 3797 PetscBT is_primal; 3798 PetscInt *aux_primal_numbering_B; 3799 /* matrix sizes */ 3800 PetscInt global_size,local_size; 3801 /* temporary change of basis */ 3802 Mat localChangeOfBasisMatrix; 3803 /* extra space for debugging */ 3804 PetscScalar *dbg_work; 3805 3806 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3807 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3808 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3809 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3810 /* nonzeros for local mat */ 3811 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3812 for (i=0;i<pcis->n;i++) nnz[i]=1; 3813 for (i=n_vertices;i<total_counts_cc;i++) { 3814 if (PetscBTLookup(change_basis,i)) { 3815 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3816 if (PetscBTLookup(qr_needed_idx,i)) { 3817 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3818 } else { 3819 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3820 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3821 } 3822 } 3823 } 3824 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3825 ierr = PetscFree(nnz);CHKERRQ(ierr); 3826 /* Set initial identity in the matrix */ 3827 for (i=0;i<pcis->n;i++) { 3828 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3829 } 3830 3831 if (pcbddc->dbg_flag) { 3832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3833 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3834 } 3835 3836 3837 /* Now we loop on the constraints which need a change of basis */ 3838 /* 3839 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3840 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3841 3842 Basic blocks of change of basis matrix T computed by 3843 3844 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3845 3846 | 1 0 ... 0 s_1/S | 3847 | 0 1 ... 0 s_2/S | 3848 | ... | 3849 | 0 ... 1 s_{n-1}/S | 3850 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3851 3852 with S = \sum_{i=1}^n s_i^2 3853 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3854 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3855 3856 - QR decomposition of constraints otherwise 3857 */ 3858 if (qr_needed) { 3859 /* space to store Q */ 3860 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3861 /* first we issue queries for optimal work */ 3862 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3863 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3864 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3865 lqr_work = -1; 3866 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3867 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3868 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3869 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3870 lgqr_work = -1; 3871 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3872 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3873 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3874 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3875 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3876 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3877 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3878 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3879 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3880 /* array to store scaling factors for reflectors */ 3881 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3882 /* array to store rhs and solution of triangular solver */ 3883 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3884 /* allocating workspace for check */ 3885 if (pcbddc->dbg_flag) { 3886 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3887 } 3888 } 3889 /* array to store whether a node is primal or not */ 3890 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3891 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3892 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3893 if (i != total_primal_vertices) { 3894 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3895 } 3896 for (i=0;i<total_primal_vertices;i++) { 3897 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3898 } 3899 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3900 3901 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3902 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3903 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3904 if (PetscBTLookup(change_basis,total_counts)) { 3905 /* get constraint info */ 3906 primal_dofs = constraints_n[total_counts]; 3907 dual_dofs = size_of_constraint-primal_dofs; 3908 3909 if (pcbddc->dbg_flag) { 3910 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3911 } 3912 3913 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3914 3915 /* copy quadrature constraints for change of basis check */ 3916 if (pcbddc->dbg_flag) { 3917 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3918 } 3919 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3920 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3921 3922 /* compute QR decomposition of constraints */ 3923 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3924 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3926 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3927 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3928 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3930 3931 /* explictly compute R^-T */ 3932 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3933 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3934 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3935 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3937 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3938 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3939 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3940 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3941 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3942 3943 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3946 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3948 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3949 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3950 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3951 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3952 3953 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3954 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3955 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3957 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3958 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3959 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3960 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3961 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3962 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3963 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3964 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3965 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3966 3967 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3968 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3969 /* insert cols for primal dofs */ 3970 for (j=0;j<primal_dofs;j++) { 3971 start_vals = &qr_basis[j*size_of_constraint]; 3972 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3973 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3974 } 3975 /* insert cols for dual dofs */ 3976 for (j=0,k=0;j<dual_dofs;k++) { 3977 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3978 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3979 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3980 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3981 j++; 3982 } 3983 } 3984 3985 /* check change of basis */ 3986 if (pcbddc->dbg_flag) { 3987 PetscInt ii,jj; 3988 PetscBool valid_qr=PETSC_TRUE; 3989 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3993 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3994 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3995 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3996 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3997 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3998 for (jj=0;jj<size_of_constraint;jj++) { 3999 for (ii=0;ii<primal_dofs;ii++) { 4000 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4001 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4002 } 4003 } 4004 if (!valid_qr) { 4005 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4006 for (jj=0;jj<size_of_constraint;jj++) { 4007 for (ii=0;ii<primal_dofs;ii++) { 4008 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4009 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4010 } 4011 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4012 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4013 } 4014 } 4015 } 4016 } else { 4017 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4018 } 4019 } 4020 } else { /* simple transformation block */ 4021 PetscInt row,col; 4022 PetscScalar val,norm; 4023 4024 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4025 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4026 for (j=0;j<size_of_constraint;j++) { 4027 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4028 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4029 if (!PetscBTLookup(is_primal,row_B)) { 4030 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4031 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4032 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4033 } else { 4034 for (k=0;k<size_of_constraint;k++) { 4035 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4036 if (row != col) { 4037 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4038 } else { 4039 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4040 } 4041 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4042 } 4043 } 4044 } 4045 if (pcbddc->dbg_flag) { 4046 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4047 } 4048 } 4049 } else { 4050 if (pcbddc->dbg_flag) { 4051 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4052 } 4053 } 4054 } 4055 4056 /* free workspace */ 4057 if (qr_needed) { 4058 if (pcbddc->dbg_flag) { 4059 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4060 } 4061 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4062 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4063 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4064 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4065 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4066 } 4067 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4068 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4069 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4070 4071 /* assembling of global change of variable */ 4072 if (!pcbddc->fake_change) { 4073 Mat tmat; 4074 PetscInt bs; 4075 4076 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4077 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4078 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4079 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4080 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4081 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4082 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4083 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4084 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4085 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4086 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4087 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4088 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4089 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4090 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4091 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4092 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4093 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4094 4095 /* check */ 4096 if (pcbddc->dbg_flag) { 4097 PetscReal error; 4098 Vec x,x_change; 4099 4100 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4101 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4102 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4103 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4104 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4105 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4106 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4107 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4108 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4109 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4110 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4111 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4114 ierr = VecDestroy(&x);CHKERRQ(ierr); 4115 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4116 } 4117 } 4118 4119 /* adapt sub_schurs computed (if any) */ 4120 if (pcbddc->use_deluxe_scaling) { 4121 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4122 if (sub_schurs->S_Ej_all) { 4123 Mat S_new,tmat; 4124 ISLocalToGlobalMapping NtoSall; 4125 IS is_all_N,is_V,is_V_Sall; 4126 const PetscScalar *array; 4127 const PetscInt *idxs_V,*idxs_all; 4128 PetscInt i,n_V; 4129 4130 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4131 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4132 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4133 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4134 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4135 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4136 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4137 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4138 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4139 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4140 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4141 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4142 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4143 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4144 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4145 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4146 for (i=0;i<n_V;i++) { 4147 PetscScalar val; 4148 PetscInt idx; 4149 4150 idx = idxs_V[i]; 4151 val = array[idxs_all[idxs_V[i]]]; 4152 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4153 } 4154 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4155 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4156 sub_schurs->S_Ej_all = S_new; 4157 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4158 if (sub_schurs->sum_S_Ej_all) { 4159 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4160 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4161 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4162 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4163 sub_schurs->sum_S_Ej_all = S_new; 4164 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4165 } 4166 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4167 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4168 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4169 if (!pcbddc->fake_change) { 4170 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4171 } else { 4172 ierr = ISDestroy(&sub_schurs->change_primal);CHKERRQ(ierr); 4173 sub_schurs->change_primal = is_V_Sall; 4174 ierr = KSPDestroy(&sub_schurs->change);CHKERRQ(ierr); 4175 ierr = KSPCreate(PETSC_COMM_SELF,&sub_schurs->change);CHKERRQ(ierr); 4176 ierr = KSPSetOperators(sub_schurs->change,tmat,tmat);CHKERRQ(ierr); 4177 } 4178 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4179 } 4180 } 4181 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4182 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4183 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4184 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4185 } 4186 4187 /* set up change of basis context */ 4188 if (pcbddc->ChangeOfBasisMatrix) { 4189 PCBDDCChange_ctx change_ctx; 4190 4191 if (!pcbddc->new_global_mat) { 4192 PetscInt global_size,local_size; 4193 4194 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4195 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4196 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4197 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4198 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4199 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4200 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4201 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4202 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4203 } else { 4204 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4205 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4206 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4207 } 4208 if (!pcbddc->user_ChangeOfBasisMatrix) { 4209 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4210 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4211 } else { 4212 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4213 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4214 } 4215 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4216 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4217 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4218 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4219 } else { 4220 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4221 } 4222 4223 /* add pressure dofs to set of primal nodes for numbering purposes */ 4224 for (i=0;i<pcbddc->benign_n;i++) { 4225 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4226 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4227 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4228 pcbddc->local_primal_size_cc++; 4229 pcbddc->local_primal_size++; 4230 } 4231 4232 /* check if a new primal space has been introduced (also take into account benign trick) */ 4233 pcbddc->new_primal_space_local = PETSC_TRUE; 4234 if (olocal_primal_size == pcbddc->local_primal_size) { 4235 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4236 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4237 if (!pcbddc->new_primal_space_local) { 4238 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4239 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4240 } 4241 } 4242 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4243 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4244 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4245 4246 /* flush dbg viewer */ 4247 if (pcbddc->dbg_flag) { 4248 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4249 } 4250 4251 /* free workspace */ 4252 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4253 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4254 if (!pcbddc->adaptive_selection) { 4255 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4256 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4257 } else { 4258 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4259 pcbddc->adaptive_constraints_idxs_ptr, 4260 pcbddc->adaptive_constraints_data_ptr, 4261 pcbddc->adaptive_constraints_idxs, 4262 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4263 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4264 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4265 } 4266 PetscFunctionReturn(0); 4267 } 4268 4269 #undef __FUNCT__ 4270 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4271 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4272 { 4273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4274 PC_IS *pcis = (PC_IS*)pc->data; 4275 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4276 PetscInt ierr,i,vertex_size,N; 4277 PetscViewer viewer=pcbddc->dbg_viewer; 4278 4279 PetscFunctionBegin; 4280 /* Reset previously computed graph */ 4281 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4282 /* Init local Graph struct */ 4283 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4284 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4285 4286 /* Check validity of the csr graph passed in by the user */ 4287 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4288 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4289 } 4290 4291 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4292 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4293 PetscInt *xadj,*adjncy; 4294 PetscInt nvtxs; 4295 PetscBool flg_row=PETSC_FALSE; 4296 4297 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4298 if (flg_row) { 4299 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4300 pcbddc->computed_rowadj = PETSC_TRUE; 4301 } 4302 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4303 } 4304 if (pcbddc->dbg_flag) { 4305 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4306 } 4307 4308 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4309 vertex_size = 1; 4310 if (pcbddc->user_provided_isfordofs) { 4311 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4312 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4313 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4314 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4315 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4316 } 4317 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4318 pcbddc->n_ISForDofs = 0; 4319 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4320 } 4321 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4322 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4323 } else { 4324 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4325 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4326 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4327 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4328 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4329 } 4330 } 4331 } 4332 4333 /* Setup of Graph */ 4334 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4335 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4336 } 4337 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4338 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4339 } 4340 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4341 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4342 } 4343 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4344 4345 /* attach info on disconnected subdomains if present */ 4346 if (pcbddc->n_local_subs) { 4347 PetscInt *local_subs; 4348 4349 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4350 for (i=0;i<pcbddc->n_local_subs;i++) { 4351 const PetscInt *idxs; 4352 PetscInt nl,j; 4353 4354 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4355 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4356 for (j=0;j<nl;j++) { 4357 local_subs[idxs[j]] = i; 4358 } 4359 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4360 } 4361 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4362 pcbddc->mat_graph->local_subs = local_subs; 4363 } 4364 4365 /* Graph's connected components analysis */ 4366 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4367 4368 /* print some info to stdout */ 4369 if (pcbddc->dbg_flag) { 4370 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4371 } 4372 4373 /* mark topography has done */ 4374 pcbddc->recompute_topography = PETSC_FALSE; 4375 PetscFunctionReturn(0); 4376 } 4377 4378 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4379 #undef __FUNCT__ 4380 #define __FUNCT__ "PCBDDCSubsetNumbering" 4381 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4382 { 4383 PetscSF sf; 4384 PetscLayout map; 4385 const PetscInt *idxs; 4386 PetscInt *leaf_data,*root_data,*gidxs; 4387 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4388 PetscInt n_n,nlocals,start,first_index; 4389 PetscMPIInt commsize; 4390 PetscBool first_found; 4391 PetscErrorCode ierr; 4392 4393 PetscFunctionBegin; 4394 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4395 if (subset_mult) { 4396 PetscCheckSameComm(subset,1,subset_mult,2); 4397 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4398 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4399 } 4400 /* create workspace layout for computing global indices of subset */ 4401 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4402 lbounds[0] = lbounds[1] = 0; 4403 for (i=0;i<n;i++) { 4404 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4405 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4406 } 4407 lbounds[0] = -lbounds[0]; 4408 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4409 gbounds[0] = -gbounds[0]; 4410 N = gbounds[1] - gbounds[0] + 1; 4411 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4412 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4413 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4414 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4415 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4416 4417 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4418 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4419 if (subset_mult) { 4420 const PetscInt* idxs_mult; 4421 4422 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4423 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4424 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4425 } else { 4426 for (i=0;i<n;i++) leaf_data[i] = 1; 4427 } 4428 /* local size of new subset */ 4429 n_n = 0; 4430 for (i=0;i<n;i++) n_n += leaf_data[i]; 4431 4432 /* global indexes in layout */ 4433 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4434 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4435 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4436 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4437 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4438 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4439 4440 /* reduce from leaves to roots */ 4441 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4442 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4443 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4444 4445 /* count indexes in local part of layout */ 4446 nlocals = 0; 4447 first_index = -1; 4448 first_found = PETSC_FALSE; 4449 for (i=0;i<Nl;i++) { 4450 if (!first_found && root_data[i]) { 4451 first_found = PETSC_TRUE; 4452 first_index = i; 4453 } 4454 nlocals += root_data[i]; 4455 } 4456 4457 /* cumulative of number of indexes and size of subset without holes */ 4458 #if defined(PETSC_HAVE_MPI_EXSCAN) 4459 start = 0; 4460 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4461 #else 4462 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4463 start = start-nlocals; 4464 #endif 4465 4466 if (N_n) { /* compute total size of new subset if requested */ 4467 *N_n = start + nlocals; 4468 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4469 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4470 } 4471 4472 /* adapt root data with cumulative */ 4473 if (first_found) { 4474 PetscInt old_index; 4475 4476 root_data[first_index] += start; 4477 old_index = first_index; 4478 for (i=first_index+1;i<Nl;i++) { 4479 if (root_data[i]) { 4480 root_data[i] += root_data[old_index]; 4481 old_index = i; 4482 } 4483 } 4484 } 4485 4486 /* from roots to leaves */ 4487 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4488 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4489 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4490 4491 /* create new IS with global indexes without holes */ 4492 if (subset_mult) { 4493 const PetscInt* idxs_mult; 4494 PetscInt cum; 4495 4496 cum = 0; 4497 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4498 for (i=0;i<n;i++) { 4499 PetscInt j; 4500 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4501 } 4502 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4503 } else { 4504 for (i=0;i<n;i++) { 4505 gidxs[i] = leaf_data[i]-1; 4506 } 4507 } 4508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4509 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4515 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4516 { 4517 PetscInt i,j; 4518 PetscScalar *alphas; 4519 PetscErrorCode ierr; 4520 4521 PetscFunctionBegin; 4522 /* this implements stabilized Gram-Schmidt */ 4523 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4524 for (i=0;i<n;i++) { 4525 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4526 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4527 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4528 } 4529 ierr = PetscFree(alphas);CHKERRQ(ierr); 4530 PetscFunctionReturn(0); 4531 } 4532 4533 #undef __FUNCT__ 4534 #define __FUNCT__ "MatISGetSubassemblingPattern" 4535 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4536 { 4537 IS ranks_send_to; 4538 PetscInt n_neighs,*neighs,*n_shared,**shared; 4539 PetscMPIInt size,rank,color; 4540 PetscInt *xadj,*adjncy; 4541 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4542 PetscInt i,local_size,threshold=0; 4543 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4544 PetscSubcomm subcomm; 4545 PetscErrorCode ierr; 4546 4547 PetscFunctionBegin; 4548 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4549 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4550 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4551 4552 /* Get info on mapping */ 4553 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4554 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4555 4556 /* build local CSR graph of subdomains' connectivity */ 4557 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4558 xadj[0] = 0; 4559 xadj[1] = PetscMax(n_neighs-1,0); 4560 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4561 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4562 4563 if (threshold) { 4564 PetscInt xadj_count = 0; 4565 for (i=1;i<n_neighs;i++) { 4566 if (n_shared[i] > threshold) { 4567 adjncy[xadj_count] = neighs[i]; 4568 adjncy_wgt[xadj_count] = n_shared[i]; 4569 xadj_count++; 4570 } 4571 } 4572 xadj[1] = xadj_count; 4573 } else { 4574 if (xadj[1]) { 4575 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4576 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4577 } 4578 } 4579 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4580 if (use_square) { 4581 for (i=0;i<xadj[1];i++) { 4582 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4583 } 4584 } 4585 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4586 4587 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4588 4589 /* 4590 Restrict work on active processes only. 4591 */ 4592 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4593 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4594 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4595 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4596 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4597 if (color) { 4598 ierr = PetscFree(xadj);CHKERRQ(ierr); 4599 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4600 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4601 } else { 4602 Mat subdomain_adj; 4603 IS new_ranks,new_ranks_contig; 4604 MatPartitioning partitioner; 4605 PetscInt prank,rstart=0,rend=0; 4606 PetscInt *is_indices,*oldranks; 4607 PetscBool aggregate; 4608 4609 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4610 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4611 prank = rank; 4612 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4613 /* 4614 for (i=0;i<size;i++) { 4615 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4616 } 4617 */ 4618 for (i=0;i<xadj[1];i++) { 4619 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4620 } 4621 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4622 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4623 if (aggregate) { 4624 PetscInt lrows,row,ncols,*cols; 4625 PetscMPIInt nrank; 4626 PetscScalar *vals; 4627 4628 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4629 lrows = 0; 4630 if (nrank<redprocs) { 4631 lrows = size/redprocs; 4632 if (nrank<size%redprocs) lrows++; 4633 } 4634 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4635 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4636 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4637 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4638 row = nrank; 4639 ncols = xadj[1]-xadj[0]; 4640 cols = adjncy; 4641 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4642 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4643 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4644 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4645 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4646 ierr = PetscFree(xadj);CHKERRQ(ierr); 4647 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4648 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4649 ierr = PetscFree(vals);CHKERRQ(ierr); 4650 } else { 4651 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4652 } 4653 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4654 4655 /* Partition */ 4656 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4657 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4658 if (use_vwgt) { 4659 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4660 v_wgt[0] = local_size; 4661 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4662 } 4663 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4664 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4665 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4666 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4667 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4668 4669 /* renumber new_ranks to avoid "holes" in new set of processors */ 4670 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4671 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4672 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4673 if (!redprocs) { 4674 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4675 } else { 4676 PetscInt idxs[1]; 4677 PetscMPIInt tag; 4678 MPI_Request *reqs; 4679 4680 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4681 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4682 for (i=rstart;i<rend;i++) { 4683 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4684 } 4685 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4686 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4687 ierr = PetscFree(reqs);CHKERRQ(ierr); 4688 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4689 } 4690 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4691 /* clean up */ 4692 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4693 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4694 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4695 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4696 } 4697 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4698 4699 /* assemble parallel IS for sends */ 4700 i = 1; 4701 if (color) i=0; 4702 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4703 /* get back IS */ 4704 *is_sends = ranks_send_to; 4705 PetscFunctionReturn(0); 4706 } 4707 4708 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4709 4710 #undef __FUNCT__ 4711 #define __FUNCT__ "MatISSubassemble" 4712 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4713 { 4714 Mat local_mat; 4715 IS is_sends_internal; 4716 PetscInt rows,cols,new_local_rows; 4717 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4718 PetscBool ismatis,isdense,newisdense,destroy_mat; 4719 ISLocalToGlobalMapping l2gmap; 4720 PetscInt* l2gmap_indices; 4721 const PetscInt* is_indices; 4722 MatType new_local_type; 4723 /* buffers */ 4724 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4725 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4726 PetscInt *recv_buffer_idxs_local; 4727 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4728 /* MPI */ 4729 MPI_Comm comm,comm_n; 4730 PetscSubcomm subcomm; 4731 PetscMPIInt n_sends,n_recvs,commsize; 4732 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4733 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4734 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4735 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4736 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4737 PetscErrorCode ierr; 4738 4739 PetscFunctionBegin; 4740 /* TODO: add missing checks */ 4741 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4742 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4743 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4744 PetscValidLogicalCollectiveInt(mat,nis,7); 4745 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4746 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4747 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4748 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4749 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4750 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4751 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4752 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4753 PetscInt mrows,mcols,mnrows,mncols; 4754 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4755 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4756 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4757 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4758 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4759 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4760 } 4761 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4762 PetscValidLogicalCollectiveInt(mat,bs,0); 4763 /* prepare IS for sending if not provided */ 4764 if (!is_sends) { 4765 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4766 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4767 } else { 4768 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4769 is_sends_internal = is_sends; 4770 } 4771 4772 /* get comm */ 4773 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4774 4775 /* compute number of sends */ 4776 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4777 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4778 4779 /* compute number of receives */ 4780 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4781 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4782 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4783 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4784 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4785 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4786 ierr = PetscFree(iflags);CHKERRQ(ierr); 4787 4788 /* restrict comm if requested */ 4789 subcomm = 0; 4790 destroy_mat = PETSC_FALSE; 4791 if (restrict_comm) { 4792 PetscMPIInt color,subcommsize; 4793 4794 color = 0; 4795 if (restrict_full) { 4796 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4797 } else { 4798 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4799 } 4800 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4801 subcommsize = commsize - subcommsize; 4802 /* check if reuse has been requested */ 4803 if (reuse == MAT_REUSE_MATRIX) { 4804 if (*mat_n) { 4805 PetscMPIInt subcommsize2; 4806 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4807 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4808 comm_n = PetscObjectComm((PetscObject)*mat_n); 4809 } else { 4810 comm_n = PETSC_COMM_SELF; 4811 } 4812 } else { /* MAT_INITIAL_MATRIX */ 4813 PetscMPIInt rank; 4814 4815 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4816 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4817 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4818 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4819 comm_n = PetscSubcommChild(subcomm); 4820 } 4821 /* flag to destroy *mat_n if not significative */ 4822 if (color) destroy_mat = PETSC_TRUE; 4823 } else { 4824 comm_n = comm; 4825 } 4826 4827 /* prepare send/receive buffers */ 4828 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4829 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4830 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4831 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4832 if (nis) { 4833 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4834 } 4835 4836 /* Get data from local matrices */ 4837 if (!isdense) { 4838 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4839 /* TODO: See below some guidelines on how to prepare the local buffers */ 4840 /* 4841 send_buffer_vals should contain the raw values of the local matrix 4842 send_buffer_idxs should contain: 4843 - MatType_PRIVATE type 4844 - PetscInt size_of_l2gmap 4845 - PetscInt global_row_indices[size_of_l2gmap] 4846 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4847 */ 4848 } else { 4849 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4850 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4851 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4852 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4853 send_buffer_idxs[1] = i; 4854 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4855 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4856 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4857 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4858 for (i=0;i<n_sends;i++) { 4859 ilengths_vals[is_indices[i]] = len*len; 4860 ilengths_idxs[is_indices[i]] = len+2; 4861 } 4862 } 4863 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4864 /* additional is (if any) */ 4865 if (nis) { 4866 PetscMPIInt psum; 4867 PetscInt j; 4868 for (j=0,psum=0;j<nis;j++) { 4869 PetscInt plen; 4870 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4871 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4872 psum += len+1; /* indices + lenght */ 4873 } 4874 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4875 for (j=0,psum=0;j<nis;j++) { 4876 PetscInt plen; 4877 const PetscInt *is_array_idxs; 4878 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4879 send_buffer_idxs_is[psum] = plen; 4880 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4881 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4882 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4883 psum += plen+1; /* indices + lenght */ 4884 } 4885 for (i=0;i<n_sends;i++) { 4886 ilengths_idxs_is[is_indices[i]] = psum; 4887 } 4888 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4889 } 4890 4891 buf_size_idxs = 0; 4892 buf_size_vals = 0; 4893 buf_size_idxs_is = 0; 4894 for (i=0;i<n_recvs;i++) { 4895 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4896 buf_size_vals += (PetscInt)olengths_vals[i]; 4897 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4898 } 4899 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4900 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4901 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4902 4903 /* get new tags for clean communications */ 4904 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4905 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4906 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4907 4908 /* allocate for requests */ 4909 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4910 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4911 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4912 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4913 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4914 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4915 4916 /* communications */ 4917 ptr_idxs = recv_buffer_idxs; 4918 ptr_vals = recv_buffer_vals; 4919 ptr_idxs_is = recv_buffer_idxs_is; 4920 for (i=0;i<n_recvs;i++) { 4921 source_dest = onodes[i]; 4922 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4923 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4924 ptr_idxs += olengths_idxs[i]; 4925 ptr_vals += olengths_vals[i]; 4926 if (nis) { 4927 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4928 ptr_idxs_is += olengths_idxs_is[i]; 4929 } 4930 } 4931 for (i=0;i<n_sends;i++) { 4932 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4933 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4934 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4935 if (nis) { 4936 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4937 } 4938 } 4939 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4940 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4941 4942 /* assemble new l2g map */ 4943 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4944 ptr_idxs = recv_buffer_idxs; 4945 new_local_rows = 0; 4946 for (i=0;i<n_recvs;i++) { 4947 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4948 ptr_idxs += olengths_idxs[i]; 4949 } 4950 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4951 ptr_idxs = recv_buffer_idxs; 4952 new_local_rows = 0; 4953 for (i=0;i<n_recvs;i++) { 4954 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4955 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4956 ptr_idxs += olengths_idxs[i]; 4957 } 4958 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4959 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4960 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4961 4962 /* infer new local matrix type from received local matrices type */ 4963 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4964 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4965 if (n_recvs) { 4966 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4967 ptr_idxs = recv_buffer_idxs; 4968 for (i=0;i<n_recvs;i++) { 4969 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4970 new_local_type_private = MATAIJ_PRIVATE; 4971 break; 4972 } 4973 ptr_idxs += olengths_idxs[i]; 4974 } 4975 switch (new_local_type_private) { 4976 case MATDENSE_PRIVATE: 4977 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4978 new_local_type = MATSEQAIJ; 4979 bs = 1; 4980 } else { /* if I receive only 1 dense matrix */ 4981 new_local_type = MATSEQDENSE; 4982 bs = 1; 4983 } 4984 break; 4985 case MATAIJ_PRIVATE: 4986 new_local_type = MATSEQAIJ; 4987 bs = 1; 4988 break; 4989 case MATBAIJ_PRIVATE: 4990 new_local_type = MATSEQBAIJ; 4991 break; 4992 case MATSBAIJ_PRIVATE: 4993 new_local_type = MATSEQSBAIJ; 4994 break; 4995 default: 4996 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4997 break; 4998 } 4999 } else { /* by default, new_local_type is seqdense */ 5000 new_local_type = MATSEQDENSE; 5001 bs = 1; 5002 } 5003 5004 /* create MATIS object if needed */ 5005 if (reuse == MAT_INITIAL_MATRIX) { 5006 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5007 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5008 } else { 5009 /* it also destroys the local matrices */ 5010 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5011 } 5012 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5013 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5014 5015 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5016 5017 /* Global to local map of received indices */ 5018 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5019 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5020 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5021 5022 /* restore attributes -> type of incoming data and its size */ 5023 buf_size_idxs = 0; 5024 for (i=0;i<n_recvs;i++) { 5025 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5026 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5027 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5028 } 5029 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5030 5031 /* set preallocation */ 5032 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5033 if (!newisdense) { 5034 PetscInt *new_local_nnz=0; 5035 5036 ptr_vals = recv_buffer_vals; 5037 ptr_idxs = recv_buffer_idxs_local; 5038 if (n_recvs) { 5039 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5040 } 5041 for (i=0;i<n_recvs;i++) { 5042 PetscInt j; 5043 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5044 for (j=0;j<*(ptr_idxs+1);j++) { 5045 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5046 } 5047 } else { 5048 /* TODO */ 5049 } 5050 ptr_idxs += olengths_idxs[i]; 5051 } 5052 if (new_local_nnz) { 5053 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5054 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5055 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5056 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5057 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5058 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5059 } else { 5060 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5061 } 5062 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5063 } else { 5064 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5065 } 5066 5067 /* set values */ 5068 ptr_vals = recv_buffer_vals; 5069 ptr_idxs = recv_buffer_idxs_local; 5070 for (i=0;i<n_recvs;i++) { 5071 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5072 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5073 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5074 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5075 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5076 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5077 } else { 5078 /* TODO */ 5079 } 5080 ptr_idxs += olengths_idxs[i]; 5081 ptr_vals += olengths_vals[i]; 5082 } 5083 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5084 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5085 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5086 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5087 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5088 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5089 5090 #if 0 5091 if (!restrict_comm) { /* check */ 5092 Vec lvec,rvec; 5093 PetscReal infty_error; 5094 5095 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5096 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5097 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5098 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5099 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5100 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5101 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5102 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5103 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5104 } 5105 #endif 5106 5107 /* assemble new additional is (if any) */ 5108 if (nis) { 5109 PetscInt **temp_idxs,*count_is,j,psum; 5110 5111 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5112 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5113 ptr_idxs = recv_buffer_idxs_is; 5114 psum = 0; 5115 for (i=0;i<n_recvs;i++) { 5116 for (j=0;j<nis;j++) { 5117 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5118 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5119 psum += plen; 5120 ptr_idxs += plen+1; /* shift pointer to received data */ 5121 } 5122 } 5123 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5124 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5125 for (i=1;i<nis;i++) { 5126 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5127 } 5128 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5129 ptr_idxs = recv_buffer_idxs_is; 5130 for (i=0;i<n_recvs;i++) { 5131 for (j=0;j<nis;j++) { 5132 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5133 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5134 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5135 ptr_idxs += plen+1; /* shift pointer to received data */ 5136 } 5137 } 5138 for (i=0;i<nis;i++) { 5139 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5140 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5141 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5142 } 5143 ierr = PetscFree(count_is);CHKERRQ(ierr); 5144 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5145 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5146 } 5147 /* free workspace */ 5148 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5149 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5150 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5151 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5152 if (isdense) { 5153 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5154 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5155 } else { 5156 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5157 } 5158 if (nis) { 5159 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5160 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5161 } 5162 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5163 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5164 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5165 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5166 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5167 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5168 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5169 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5170 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5171 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5172 ierr = PetscFree(onodes);CHKERRQ(ierr); 5173 if (nis) { 5174 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5175 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5176 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5177 } 5178 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5179 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5180 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5181 for (i=0;i<nis;i++) { 5182 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5183 } 5184 *mat_n = NULL; 5185 } 5186 PetscFunctionReturn(0); 5187 } 5188 5189 /* temporary hack into ksp private data structure */ 5190 #include <petsc/private/kspimpl.h> 5191 5192 #undef __FUNCT__ 5193 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5194 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5195 { 5196 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5197 PC_IS *pcis = (PC_IS*)pc->data; 5198 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5199 MatNullSpace CoarseNullSpace=NULL; 5200 ISLocalToGlobalMapping coarse_islg; 5201 IS coarse_is,*isarray; 5202 PetscInt i,im_active=-1,active_procs=-1; 5203 PetscInt nis,nisdofs,nisneu,nisvert; 5204 PC pc_temp; 5205 PCType coarse_pc_type; 5206 KSPType coarse_ksp_type; 5207 PetscBool multilevel_requested,multilevel_allowed; 5208 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5209 Mat t_coarse_mat_is; 5210 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5211 PetscMPIInt all_procs; 5212 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5213 PetscBool compute_vecs = PETSC_FALSE; 5214 PetscScalar *array; 5215 PetscErrorCode ierr; 5216 5217 PetscFunctionBegin; 5218 /* Assign global numbering to coarse dofs */ 5219 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5220 PetscInt ocoarse_size; 5221 compute_vecs = PETSC_TRUE; 5222 ocoarse_size = pcbddc->coarse_size; 5223 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5224 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5225 /* see if we can avoid some work */ 5226 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5227 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5228 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5229 PC pc; 5230 PetscBool isbddc; 5231 5232 /* temporary workaround since PCBDDC does not have a reset method so far */ 5233 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5234 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5235 if (isbddc) { 5236 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5237 } else { 5238 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5239 } 5240 coarse_reuse = PETSC_FALSE; 5241 } else { /* we can safely reuse already computed coarse matrix */ 5242 coarse_reuse = PETSC_TRUE; 5243 } 5244 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5245 coarse_reuse = PETSC_FALSE; 5246 } 5247 /* reset any subassembling information */ 5248 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5249 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5250 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5251 coarse_reuse = PETSC_TRUE; 5252 } 5253 5254 /* count "active" (i.e. with positive local size) and "void" processes */ 5255 im_active = !!(pcis->n); 5256 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5257 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5258 void_procs = all_procs-active_procs; 5259 csin_type_simple = PETSC_TRUE; 5260 redist = PETSC_FALSE; 5261 if (pcbddc->current_level && void_procs) { 5262 csin_ml = PETSC_TRUE; 5263 ncoarse_ml = void_procs; 5264 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5265 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5266 csin_ds = PETSC_TRUE; 5267 ncoarse_ds = pcbddc->redistribute_coarse; 5268 redist = PETSC_TRUE; 5269 } else { 5270 csin_ds = PETSC_TRUE; 5271 ncoarse_ds = active_procs; 5272 redist = PETSC_TRUE; 5273 } 5274 } else { 5275 csin_ml = PETSC_FALSE; 5276 ncoarse_ml = all_procs; 5277 if (void_procs) { 5278 csin_ds = PETSC_TRUE; 5279 ncoarse_ds = void_procs; 5280 csin_type_simple = PETSC_FALSE; 5281 } else { 5282 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5283 csin_ds = PETSC_TRUE; 5284 ncoarse_ds = pcbddc->redistribute_coarse; 5285 redist = PETSC_TRUE; 5286 } else { 5287 csin_ds = PETSC_FALSE; 5288 ncoarse_ds = all_procs; 5289 } 5290 } 5291 } 5292 5293 /* 5294 test if we can go multilevel: three conditions must be satisfied: 5295 - we have not exceeded the number of levels requested 5296 - we can actually subassemble the active processes 5297 - we can find a suitable number of MPI processes where we can place the subassembled problem 5298 */ 5299 multilevel_allowed = PETSC_FALSE; 5300 multilevel_requested = PETSC_FALSE; 5301 if (pcbddc->current_level < pcbddc->max_levels) { 5302 multilevel_requested = PETSC_TRUE; 5303 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5304 multilevel_allowed = PETSC_FALSE; 5305 } else { 5306 multilevel_allowed = PETSC_TRUE; 5307 } 5308 } 5309 /* determine number of process partecipating to coarse solver */ 5310 if (multilevel_allowed) { 5311 ncoarse = ncoarse_ml; 5312 csin = csin_ml; 5313 redist = PETSC_FALSE; 5314 } else { 5315 ncoarse = ncoarse_ds; 5316 csin = csin_ds; 5317 } 5318 5319 /* creates temporary l2gmap and IS for coarse indexes */ 5320 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5321 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5322 5323 /* creates temporary MATIS object for coarse matrix */ 5324 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5325 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5326 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5327 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5328 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5329 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5330 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5331 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5332 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5333 5334 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5335 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5336 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5337 const PetscInt *idxs; 5338 ISLocalToGlobalMapping tmap; 5339 5340 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5341 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5342 /* allocate space for temporary storage */ 5343 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5344 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5345 /* allocate for IS array */ 5346 nisdofs = pcbddc->n_ISForDofsLocal; 5347 nisneu = !!pcbddc->NeumannBoundariesLocal; 5348 nisvert = 0; 5349 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5350 nisvert = 1; 5351 } 5352 nis = nisdofs + nisneu + nisvert; 5353 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5354 /* dofs splitting */ 5355 for (i=0;i<nisdofs;i++) { 5356 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5357 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5358 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5359 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5360 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5361 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5362 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5363 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5364 } 5365 /* neumann boundaries */ 5366 if (pcbddc->NeumannBoundariesLocal) { 5367 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5368 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5369 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5370 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5371 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5372 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5373 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5374 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5375 } 5376 /* primal vertices (benign) */ 5377 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5378 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5379 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5380 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5381 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5382 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5383 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5384 } 5385 /* free memory */ 5386 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5387 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5388 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5389 } else { 5390 nis = 0; 5391 nisdofs = 0; 5392 nisneu = 0; 5393 nisvert = 0; 5394 isarray = NULL; 5395 } 5396 /* destroy no longer needed map */ 5397 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5398 5399 /* restrict on coarse candidates (if needed) */ 5400 coarse_mat_is = NULL; 5401 if (csin) { 5402 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5403 if (redist) { 5404 PetscMPIInt rank; 5405 PetscInt spc,n_spc_p1,dest[1],destsize; 5406 5407 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5408 spc = active_procs/ncoarse; 5409 n_spc_p1 = active_procs%ncoarse; 5410 if (im_active) { 5411 destsize = 1; 5412 if (rank > n_spc_p1*(spc+1)-1) { 5413 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5414 } else { 5415 dest[0] = rank/(spc+1); 5416 } 5417 } else { 5418 destsize = 0; 5419 } 5420 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5421 } else if (csin_type_simple) { 5422 PetscMPIInt rank; 5423 PetscInt issize,isidx; 5424 5425 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5426 if (im_active) { 5427 issize = 1; 5428 isidx = (PetscInt)rank; 5429 } else { 5430 issize = 0; 5431 isidx = -1; 5432 } 5433 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5434 } else { /* get a suitable subassembling pattern from MATIS code */ 5435 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5436 } 5437 5438 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5439 if (!redist || ncoarse <= void_procs) { 5440 PetscInt ncoarse_cand,tissize,*nisindices; 5441 PetscInt *coarse_candidates; 5442 const PetscInt* tisindices; 5443 5444 /* get coarse candidates' ranks in pc communicator */ 5445 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5446 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5447 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5448 if (!coarse_candidates[i]) { 5449 coarse_candidates[ncoarse_cand++]=i; 5450 } 5451 } 5452 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5453 5454 5455 if (pcbddc->dbg_flag) { 5456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5457 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5458 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5459 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5460 for (i=0;i<ncoarse_cand;i++) { 5461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5462 } 5463 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5464 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5465 } 5466 /* shift the pattern on coarse candidates */ 5467 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5468 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5469 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5470 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5471 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5472 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5473 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5474 } 5475 if (pcbddc->dbg_flag) { 5476 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5477 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5478 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5479 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5480 } 5481 } 5482 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5483 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5484 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5485 } else { /* this is the last level, so use just receiving processes in subcomm */ 5486 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5487 } 5488 } else { 5489 if (pcbddc->dbg_flag) { 5490 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5491 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5493 } 5494 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5495 coarse_mat_is = t_coarse_mat_is; 5496 } 5497 5498 /* create local to global scatters for coarse problem */ 5499 if (compute_vecs) { 5500 PetscInt lrows; 5501 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5502 if (coarse_mat_is) { 5503 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5504 } else { 5505 lrows = 0; 5506 } 5507 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5508 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5509 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5510 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5511 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5512 } 5513 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5514 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5515 5516 /* set defaults for coarse KSP and PC */ 5517 if (multilevel_allowed) { 5518 coarse_ksp_type = KSPRICHARDSON; 5519 coarse_pc_type = PCBDDC; 5520 } else { 5521 coarse_ksp_type = KSPPREONLY; 5522 coarse_pc_type = PCREDUNDANT; 5523 } 5524 5525 /* print some info if requested */ 5526 if (pcbddc->dbg_flag) { 5527 if (!multilevel_allowed) { 5528 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5529 if (multilevel_requested) { 5530 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5531 } else if (pcbddc->max_levels) { 5532 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5533 } 5534 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5535 } 5536 } 5537 5538 /* create the coarse KSP object only once with defaults */ 5539 if (coarse_mat_is) { 5540 MatReuse coarse_mat_reuse; 5541 PetscViewer dbg_viewer = NULL; 5542 if (pcbddc->dbg_flag) { 5543 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5544 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5545 } 5546 if (!pcbddc->coarse_ksp) { 5547 char prefix[256],str_level[16]; 5548 size_t len; 5549 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5550 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5551 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5552 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5553 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5554 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5555 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5556 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5557 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5558 /* prefix */ 5559 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5560 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5561 if (!pcbddc->current_level) { 5562 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5563 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5564 } else { 5565 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5566 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5567 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5568 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5569 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5570 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5571 } 5572 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5573 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5574 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5575 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5576 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5577 /* allow user customization */ 5578 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5579 } 5580 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5581 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5582 if (nisdofs) { 5583 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5584 for (i=0;i<nisdofs;i++) { 5585 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5586 } 5587 } 5588 if (nisneu) { 5589 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5590 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5591 } 5592 if (nisvert) { 5593 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5594 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5595 } 5596 5597 /* get some info after set from options */ 5598 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5599 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5600 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5601 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5602 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5603 isbddc = PETSC_FALSE; 5604 } 5605 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5606 if (isredundant) { 5607 KSP inner_ksp; 5608 PC inner_pc; 5609 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5610 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5611 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5612 } 5613 5614 /* assemble coarse matrix */ 5615 if (coarse_reuse) { 5616 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5617 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5618 coarse_mat_reuse = MAT_REUSE_MATRIX; 5619 } else { 5620 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5621 } 5622 if (isbddc || isnn) { 5623 if (isbddc) { /* currently there's no API for this */ 5624 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5625 pcbddc->detect_disconnected = PETSC_TRUE; 5626 } 5627 if (pcbddc->coarsening_ratio > 1) { 5628 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5629 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5630 if (pcbddc->dbg_flag) { 5631 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5632 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5633 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5634 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5635 } 5636 } 5637 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5638 } else { 5639 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5640 coarse_mat = coarse_mat_is; 5641 } 5642 } else { 5643 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5644 } 5645 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5646 5647 /* propagate symmetry info of coarse matrix */ 5648 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5649 if (pc->pmat->symmetric_set) { 5650 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5651 } 5652 if (pc->pmat->hermitian_set) { 5653 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5654 } 5655 if (pc->pmat->spd_set) { 5656 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5657 } 5658 /* set operators */ 5659 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5660 if (pcbddc->dbg_flag) { 5661 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5662 } 5663 } else { /* processes non partecipating to coarse solver (if any) */ 5664 coarse_mat = 0; 5665 } 5666 ierr = PetscFree(isarray);CHKERRQ(ierr); 5667 #if 0 5668 { 5669 PetscViewer viewer; 5670 char filename[256]; 5671 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5672 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5673 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5674 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5675 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5676 } 5677 #endif 5678 5679 /* Compute coarse null space (special handling by BDDC only) */ 5680 #if 0 5681 if (pcbddc->NullSpace) { 5682 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5683 } 5684 #endif 5685 /* hack */ 5686 if (pcbddc->coarse_ksp) { 5687 Vec crhs,csol; 5688 5689 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5690 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5691 if (!csol) { 5692 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5693 } 5694 if (!crhs) { 5695 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5696 } 5697 } 5698 5699 /* compute null space for coarse solver if the benign trick has been requested */ 5700 if (pcbddc->benign_null) { 5701 5702 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5703 for (i=0;i<pcbddc->benign_n;i++) { 5704 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5705 } 5706 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5707 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5708 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5709 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5710 if (coarse_mat) { 5711 Vec nullv; 5712 PetscScalar *array,*array2; 5713 PetscInt nl; 5714 5715 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5716 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5717 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5718 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5719 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5720 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5721 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5722 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5723 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5724 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5725 } 5726 } 5727 5728 if (pcbddc->coarse_ksp) { 5729 PetscBool ispreonly; 5730 5731 if (CoarseNullSpace) { 5732 PetscBool isnull; 5733 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5734 if (0) { 5735 if (isbddc && !pcbddc->benign_saddle_point) { 5736 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5737 } else { 5738 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5739 } 5740 } else { 5741 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5742 } 5743 } 5744 /* setup coarse ksp */ 5745 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5746 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5747 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5748 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5749 KSP check_ksp; 5750 KSPType check_ksp_type; 5751 PC check_pc; 5752 Vec check_vec,coarse_vec; 5753 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5754 PetscInt its; 5755 PetscBool compute_eigs; 5756 PetscReal *eigs_r,*eigs_c; 5757 PetscInt neigs; 5758 const char *prefix; 5759 5760 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5761 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5762 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5763 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5764 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5765 if (ispreonly) { 5766 check_ksp_type = KSPPREONLY; 5767 compute_eigs = PETSC_FALSE; 5768 } else { 5769 check_ksp_type = KSPGMRES; 5770 compute_eigs = PETSC_TRUE; 5771 } 5772 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5773 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5774 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5775 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5776 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5777 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5778 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5779 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5780 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5781 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5782 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5783 /* create random vec */ 5784 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5785 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5786 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5787 if (CoarseNullSpace) { 5788 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5789 } 5790 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5791 /* solve coarse problem */ 5792 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5793 if (CoarseNullSpace) { 5794 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5795 } 5796 /* set eigenvalue estimation if preonly has not been requested */ 5797 if (compute_eigs) { 5798 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5799 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5800 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5801 lambda_max = eigs_r[neigs-1]; 5802 lambda_min = eigs_r[0]; 5803 if (pcbddc->use_coarse_estimates) { 5804 if (lambda_max>lambda_min) { 5805 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5806 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5807 } 5808 } 5809 } 5810 5811 /* check coarse problem residual error */ 5812 if (pcbddc->dbg_flag) { 5813 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5814 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5815 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5816 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5817 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5818 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5819 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5820 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5821 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5822 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5823 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5824 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5825 if (CoarseNullSpace) { 5826 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5827 } 5828 if (compute_eigs) { 5829 PetscReal lambda_max_s,lambda_min_s; 5830 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5831 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5832 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5833 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5834 for (i=0;i<neigs;i++) { 5835 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5836 } 5837 } 5838 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5839 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5840 } 5841 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5842 if (compute_eigs) { 5843 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5844 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5845 } 5846 } 5847 } 5848 /* print additional info */ 5849 if (pcbddc->dbg_flag) { 5850 /* waits until all processes reaches this point */ 5851 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5852 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5853 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5854 } 5855 5856 /* free memory */ 5857 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5858 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5859 PetscFunctionReturn(0); 5860 } 5861 5862 #undef __FUNCT__ 5863 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5864 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5865 { 5866 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5867 PC_IS* pcis = (PC_IS*)pc->data; 5868 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5869 IS subset,subset_mult,subset_n; 5870 PetscInt local_size,coarse_size=0; 5871 PetscInt *local_primal_indices=NULL; 5872 const PetscInt *t_local_primal_indices; 5873 PetscErrorCode ierr; 5874 5875 PetscFunctionBegin; 5876 /* Compute global number of coarse dofs */ 5877 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5878 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5879 } 5880 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5881 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5882 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5883 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5884 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5885 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5886 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5887 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5888 if (local_size != pcbddc->local_primal_size) { 5889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5890 } 5891 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5892 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5893 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5894 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5895 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5896 5897 /* check numbering */ 5898 if (pcbddc->dbg_flag) { 5899 PetscScalar coarsesum,*array,*array2; 5900 PetscInt i; 5901 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5902 5903 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5904 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5905 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5906 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5907 /* counter */ 5908 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5909 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5910 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5911 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5912 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5913 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5914 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5915 for (i=0;i<pcbddc->local_primal_size;i++) { 5916 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5917 } 5918 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5919 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5920 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5921 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5922 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5923 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5924 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5925 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5926 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5927 for (i=0;i<pcis->n;i++) { 5928 if (array[i] != 0.0 && array[i] != array2[i]) { 5929 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5930 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5931 set_error = PETSC_TRUE; 5932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5933 } 5934 } 5935 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5936 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5938 for (i=0;i<pcis->n;i++) { 5939 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5940 } 5941 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5942 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5943 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5944 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5945 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5946 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5947 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5948 PetscInt *gidxs; 5949 5950 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5951 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5953 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5955 for (i=0;i<pcbddc->local_primal_size;i++) { 5956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5957 } 5958 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5959 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5960 } 5961 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5962 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5963 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5964 } 5965 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5966 /* get back data */ 5967 *coarse_size_n = coarse_size; 5968 *local_primal_indices_n = local_primal_indices; 5969 PetscFunctionReturn(0); 5970 } 5971 5972 #undef __FUNCT__ 5973 #define __FUNCT__ "PCBDDCGlobalToLocal" 5974 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5975 { 5976 IS localis_t; 5977 PetscInt i,lsize,*idxs,n; 5978 PetscScalar *vals; 5979 PetscErrorCode ierr; 5980 5981 PetscFunctionBegin; 5982 /* get indices in local ordering exploiting local to global map */ 5983 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5984 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5985 for (i=0;i<lsize;i++) vals[i] = 1.0; 5986 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5987 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5988 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5989 if (idxs) { /* multilevel guard */ 5990 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5991 } 5992 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5993 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5994 ierr = PetscFree(vals);CHKERRQ(ierr); 5995 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5996 /* now compute set in local ordering */ 5997 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5998 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5999 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6000 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6001 for (i=0,lsize=0;i<n;i++) { 6002 if (PetscRealPart(vals[i]) > 0.5) { 6003 lsize++; 6004 } 6005 } 6006 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6007 for (i=0,lsize=0;i<n;i++) { 6008 if (PetscRealPart(vals[i]) > 0.5) { 6009 idxs[lsize++] = i; 6010 } 6011 } 6012 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6013 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6014 *localis = localis_t; 6015 PetscFunctionReturn(0); 6016 } 6017 6018 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6019 #undef __FUNCT__ 6020 #define __FUNCT__ "PCBDDCMatMult_Private" 6021 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6022 { 6023 PCBDDCChange_ctx change_ctx; 6024 PetscErrorCode ierr; 6025 6026 PetscFunctionBegin; 6027 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6028 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6029 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6030 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6031 PetscFunctionReturn(0); 6032 } 6033 6034 #undef __FUNCT__ 6035 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6036 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6037 { 6038 PCBDDCChange_ctx change_ctx; 6039 PetscErrorCode ierr; 6040 6041 PetscFunctionBegin; 6042 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6043 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6044 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6045 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6046 PetscFunctionReturn(0); 6047 } 6048 6049 #undef __FUNCT__ 6050 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6051 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6052 { 6053 PC_IS *pcis=(PC_IS*)pc->data; 6054 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6055 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6056 Mat S_j; 6057 PetscInt *used_xadj,*used_adjncy; 6058 PetscBool free_used_adj; 6059 PetscErrorCode ierr; 6060 6061 PetscFunctionBegin; 6062 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6063 free_used_adj = PETSC_FALSE; 6064 if (pcbddc->sub_schurs_layers == -1) { 6065 used_xadj = NULL; 6066 used_adjncy = NULL; 6067 } else { 6068 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6069 used_xadj = pcbddc->mat_graph->xadj; 6070 used_adjncy = pcbddc->mat_graph->adjncy; 6071 } else if (pcbddc->computed_rowadj) { 6072 used_xadj = pcbddc->mat_graph->xadj; 6073 used_adjncy = pcbddc->mat_graph->adjncy; 6074 } else { 6075 PetscBool flg_row=PETSC_FALSE; 6076 const PetscInt *xadj,*adjncy; 6077 PetscInt nvtxs; 6078 6079 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6080 if (flg_row) { 6081 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6082 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6083 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6084 free_used_adj = PETSC_TRUE; 6085 } else { 6086 pcbddc->sub_schurs_layers = -1; 6087 used_xadj = NULL; 6088 used_adjncy = NULL; 6089 } 6090 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6091 } 6092 } 6093 6094 /* setup sub_schurs data */ 6095 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6096 if (!sub_schurs->schur_explicit) { 6097 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6098 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6099 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6100 } else { 6101 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6102 PetscBool isseqaij; 6103 PetscInt benign_n; 6104 6105 if (!pcbddc->use_vertices && reuse_solvers) { 6106 PetscInt n_vertices; 6107 6108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6109 reuse_solvers = (PetscBool)!n_vertices; 6110 } 6111 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6112 if (!isseqaij) { 6113 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6114 if (matis->A == pcbddc->local_mat) { 6115 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6116 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6117 } else { 6118 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6119 } 6120 } 6121 if (!pcbddc->benign_change_explicit) { 6122 benign_n = pcbddc->benign_n; 6123 } else { 6124 benign_n = 0; 6125 } 6126 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6127 /* if the velocities are in the original basis, then we need to eliminate the no-net-flux from the S_Ej 6128 we assume that the user has passed in the associated quadrature weights in the near-null space attached to pc->pmat 6129 We need to compute the change of basis according to the quadrature weights, and this could not be done, at the moment, without some hacking */ 6130 if (pcbddc->benign_saddle_point && !pcbddc->use_change_of_basis && !pcbddc->user_ChangeOfBasis) { 6131 PC_IS *pcisf; 6132 PC_BDDC *pcbddcf; 6133 PC pcf; 6134 6135 if (pcbddc->sub_schurs_rebuild) { 6136 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6137 } 6138 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6139 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6140 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6141 /* hacks */ 6142 pcisf = (PC_IS*)pcf->data; 6143 pcbddcf = (PC_BDDC*)pcf->data; 6144 *pcisf = *pcis; 6145 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6146 pcbddcf->mat_graph = pcbddc->mat_graph; 6147 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6148 pcbddcf->sub_schurs = sub_schurs; 6149 pcbddcf->use_faces = PETSC_TRUE; 6150 pcbddcf->use_change_of_basis = PETSC_TRUE; 6151 pcbddcf->use_change_on_faces = PETSC_TRUE; 6152 pcbddcf->use_deluxe_scaling = PETSC_TRUE; 6153 pcbddcf->fake_change = PETSC_TRUE; 6154 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6155 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6156 pcbddcf->sub_schurs = NULL; 6157 ierr = MatDestroy(&pcbddcf->ConstraintMatrix);CHKERRQ(ierr); 6158 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6159 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6160 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6161 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6162 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6163 pcf->ops->destroy = NULL; 6164 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6165 } 6166 } 6167 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6168 6169 /* free adjacency */ 6170 if (free_used_adj) { 6171 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6172 } 6173 PetscFunctionReturn(0); 6174 } 6175 6176 #undef __FUNCT__ 6177 #define __FUNCT__ "PCBDDCInitSubSchurs" 6178 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6179 { 6180 PC_IS *pcis=(PC_IS*)pc->data; 6181 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6182 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6183 PCBDDCGraph graph; 6184 PetscErrorCode ierr; 6185 6186 PetscFunctionBegin; 6187 /* attach interface graph for determining subsets */ 6188 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6189 IS verticesIS,verticescomm; 6190 PetscInt vsize,*idxs; 6191 6192 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6193 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6194 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6195 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6196 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6197 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6198 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6199 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6200 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6201 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6202 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6203 /* 6204 if (pcbddc->dbg_flag) { 6205 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6206 } 6207 */ 6208 } else { 6209 graph = pcbddc->mat_graph; 6210 } 6211 6212 /* sub_schurs init */ 6213 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6214 6215 /* free graph struct */ 6216 if (pcbddc->sub_schurs_rebuild) { 6217 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6218 } 6219 PetscFunctionReturn(0); 6220 } 6221 6222 #undef __FUNCT__ 6223 #define __FUNCT__ "PCBDDCCheckOperator" 6224 PetscErrorCode PCBDDCCheckOperator(PC pc) 6225 { 6226 PC_IS *pcis=(PC_IS*)pc->data; 6227 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6228 PetscErrorCode ierr; 6229 6230 PetscFunctionBegin; 6231 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6232 IS zerodiag = NULL; 6233 Mat S_j,B0_B=NULL; 6234 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6235 PetscScalar *p0_check,*array,*array2; 6236 PetscReal norm; 6237 PetscInt i; 6238 6239 /* B0 and B0_B */ 6240 if (zerodiag) { 6241 IS dummy; 6242 6243 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6244 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6245 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6246 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6247 } 6248 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6249 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6250 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6251 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6252 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6253 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6254 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6255 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6256 /* S_j */ 6257 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6258 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6259 6260 /* mimic vector in \widetilde{W}_\Gamma */ 6261 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6262 /* continuous in primal space */ 6263 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6264 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6265 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6266 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6267 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6268 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6269 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6270 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6271 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6272 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6273 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6274 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6275 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6276 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6277 6278 /* assemble rhs for coarse problem */ 6279 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6280 /* local with Schur */ 6281 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6282 if (zerodiag) { 6283 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6284 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6285 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6286 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6287 } 6288 /* sum on primal nodes the local contributions */ 6289 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6290 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6291 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6292 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6293 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6294 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6295 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6296 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6297 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6298 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6299 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6300 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6301 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6302 /* scale primal nodes (BDDC sums contibutions) */ 6303 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6304 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6305 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6306 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6307 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6308 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6309 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6310 /* global: \widetilde{B0}_B w_\Gamma */ 6311 if (zerodiag) { 6312 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6313 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6314 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6315 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6316 } 6317 /* BDDC */ 6318 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6319 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6320 6321 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6322 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6323 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6324 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6325 for (i=0;i<pcbddc->benign_n;i++) { 6326 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6327 } 6328 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6329 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6330 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6331 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6332 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6333 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6334 } 6335 PetscFunctionReturn(0); 6336 } 6337