1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 PetscInt *nnz; 799 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 800 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 801 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 802 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 803 for (i=0;i<pcbddc->benign_n;i++) { 804 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 805 nnz[i] = n - nnz[i]; 806 } 807 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 808 ierr = PetscFree(nnz);CHKERRQ(ierr); 809 } 810 811 for (i=0;i<pcbddc->benign_n;i++) { 812 PetscScalar *array; 813 PetscInt *idxs,j,nz,cum; 814 815 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 818 for (j=0;j<nz;j++) vals[j] = 1.; 819 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 823 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 824 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 825 cum = 0; 826 for (j=0;j<n;j++) { 827 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 828 vals[cum] = array[j]; 829 idxs_ins[cum] = j; 830 cum++; 831 } 832 } 833 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 834 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 835 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 840 } 841 } else { /* push */ 842 if (pcbddc->benign_change_explicit) { 843 PetscInt i; 844 845 for (i=0;i<pcbddc->benign_n;i++) { 846 PetscScalar *B0_vals; 847 PetscInt *B0_cols,B0_ncol; 848 849 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 850 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 851 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 852 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 853 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 859 } 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCAdaptiveSelection" 866 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 867 { 868 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 869 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 870 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 871 PetscBLASInt *B_iwork,*B_ifail; 872 PetscScalar *work,lwork; 873 PetscScalar *St,*S,*eigv; 874 PetscScalar *Sarray,*Starray; 875 PetscReal *eigs,thresh; 876 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 877 PetscBool allocated_S_St; 878 #if defined(PETSC_USE_COMPLEX) 879 PetscReal *rwork; 880 #endif 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 if (!sub_schurs->schur_explicit) { 885 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 886 } 887 888 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 if (pcbddc->dbg_flag) { 893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 if (pcbddc->dbg_flag) { 900 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 901 } 902 903 /* max size of subsets */ 904 mss = 0; 905 for (i=0;i<sub_schurs->n_subs;i++) { 906 PetscInt subset_size; 907 908 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 909 mss = PetscMax(mss,subset_size); 910 } 911 912 /* min/max and threshold */ 913 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 914 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 915 nmax = PetscMax(nmin,nmax); 916 allocated_S_St = PETSC_FALSE; 917 if (nmin) { 918 allocated_S_St = PETSC_TRUE; 919 } 920 921 /* allocate lapack workspace */ 922 cum = cum2 = 0; 923 maxneigs = 0; 924 for (i=0;i<sub_schurs->n_subs;i++) { 925 PetscInt n,subset_size; 926 927 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 928 n = PetscMin(subset_size,nmax); 929 cum += subset_size; 930 cum2 += subset_size*n; 931 maxneigs = PetscMax(maxneigs,n); 932 } 933 if (mss) { 934 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 935 PetscBLASInt B_itype = 1; 936 PetscBLASInt B_N = mss; 937 PetscReal zero = 0.0; 938 PetscReal eps = 0.0; /* dlamch? */ 939 940 B_lwork = -1; 941 S = NULL; 942 St = NULL; 943 eigs = NULL; 944 eigv = NULL; 945 B_iwork = NULL; 946 B_ifail = NULL; 947 #if defined(PETSC_USE_COMPLEX) 948 rwork = NULL; 949 #endif 950 thresh = 1.0; 951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 952 #if defined(PETSC_USE_COMPLEX) 953 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 954 #else 955 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 956 #endif 957 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 958 ierr = PetscFPTrapPop();CHKERRQ(ierr); 959 } else { 960 /* TODO */ 961 } 962 } else { 963 lwork = 0; 964 } 965 966 nv = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 968 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 969 } 970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 971 if (allocated_S_St) { 972 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 973 } 974 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 977 #endif 978 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 979 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 980 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 981 nv+cum,&pcbddc->adaptive_constraints_idxs, 982 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 983 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 984 985 maxneigs = 0; 986 cum = cumarray = 0; 987 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 988 pcbddc->adaptive_constraints_data_ptr[0] = 0; 989 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 990 const PetscInt *idxs; 991 992 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 993 for (cum=0;cum<nv;cum++) { 994 pcbddc->adaptive_constraints_n[cum] = 1; 995 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 996 pcbddc->adaptive_constraints_data[cum] = 1.0; 997 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 998 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 999 } 1000 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1001 } 1002 1003 if (mss) { /* multilevel */ 1004 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1005 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1006 } 1007 1008 thresh = pcbddc->adaptive_threshold; 1009 for (i=0;i<sub_schurs->n_subs;i++) { 1010 const PetscInt *idxs; 1011 PetscReal infty = PETSC_MAX_REAL; 1012 PetscInt j,subset_size,eigs_start = 0; 1013 PetscBLASInt B_N; 1014 PetscBool same_data = PETSC_FALSE; 1015 1016 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1017 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1018 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1019 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1020 if (sub_schurs->is_hermitian) { 1021 PetscInt j,k; 1022 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1023 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1024 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 } 1026 for (j=0;j<subset_size;j++) { 1027 for (k=j;k<subset_size;k++) { 1028 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1029 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1030 } 1031 } 1032 } else { 1033 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1034 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1035 } 1036 } else { 1037 S = Sarray + cumarray; 1038 St = Starray + cumarray; 1039 } 1040 /* see if we can save some work */ 1041 if (sub_schurs->n_subs == 1) { 1042 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1043 } 1044 1045 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1046 B_neigs = 0; 1047 } else { 1048 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1049 PetscBLASInt B_itype = 1; 1050 PetscBLASInt B_IL, B_IU; 1051 PetscReal eps = -1.0; /* dlamch? */ 1052 PetscInt nmin_s; 1053 1054 if (pcbddc->dbg_flag) { 1055 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1056 } 1057 1058 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1059 if (thresh > 1.+PETSC_SMALL) { 1060 1061 /* ask for eigenvalues larger than thresh */ 1062 #if defined(PETSC_USE_COMPLEX) 1063 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1064 #else 1065 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1066 #endif 1067 } else { 1068 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1069 B_IL = 1; 1070 #if defined(PETSC_USE_COMPLEX) 1071 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1072 #else 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1074 #endif 1075 } 1076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1077 if (B_ierr) { 1078 if (B_ierr < 0 ) { 1079 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1080 } else if (B_ierr <= B_N) { 1081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1082 } else { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1084 } 1085 } 1086 1087 if (B_neigs > nmax) { 1088 if (pcbddc->dbg_flag) { 1089 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1090 } 1091 eigs_start = B_neigs -nmax; 1092 B_neigs = nmax; 1093 } 1094 1095 nmin_s = PetscMin(nmin,B_N); 1096 if (B_neigs < nmin_s) { 1097 PetscBLASInt B_neigs2; 1098 1099 B_IU = B_N - B_neigs; 1100 B_IL = B_N - nmin_s + 1; 1101 if (pcbddc->dbg_flag) { 1102 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1103 } 1104 if (sub_schurs->is_hermitian) { 1105 PetscInt j; 1106 for (j=0;j<subset_size;j++) { 1107 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1108 } 1109 for (j=0;j<subset_size;j++) { 1110 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1111 } 1112 } else { 1113 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1114 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1115 } 1116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1117 #if defined(PETSC_USE_COMPLEX) 1118 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1119 #else 1120 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1121 #endif 1122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1123 B_neigs += B_neigs2; 1124 } 1125 if (B_ierr) { 1126 if (B_ierr < 0 ) { 1127 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1128 } else if (B_ierr <= B_N) { 1129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1130 } else { 1131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1132 } 1133 } 1134 if (pcbddc->dbg_flag) { 1135 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1136 for (j=0;j<B_neigs;j++) { 1137 if (eigs[j] == 0.0) { 1138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1141 } 1142 } 1143 } 1144 } else { 1145 /* TODO */ 1146 } 1147 } 1148 /* change the basis back to the original one */ 1149 if (sub_schurs->change) { 1150 Mat change,phi,phit; 1151 1152 if (pcbddc->dbg_flag > 1) { 1153 PetscInt ii; 1154 for (ii=0;ii<B_neigs;ii++) { 1155 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1156 for (j=0;j<B_N;j++) { 1157 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1158 } 1159 } 1160 } 1161 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1163 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1164 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1165 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1166 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1167 } 1168 maxneigs = PetscMax(B_neigs,maxneigs); 1169 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1170 if (B_neigs) { 1171 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1172 1173 if (pcbddc->dbg_flag > 1) { 1174 PetscInt ii; 1175 for (ii=0;ii<B_neigs;ii++) { 1176 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1177 for (j=0;j<B_N;j++) { 1178 #if defined(PETSC_USE_COMPLEX) 1179 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1180 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1182 #else 1183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1184 #endif 1185 } 1186 } 1187 } 1188 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1189 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1190 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1191 cum++; 1192 } 1193 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1194 /* shift for next computation */ 1195 cumarray += subset_size*subset_size; 1196 } 1197 if (pcbddc->dbg_flag) { 1198 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1199 } 1200 1201 if (mss) { 1202 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1203 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1204 /* destroy matrices (junk) */ 1205 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1206 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1207 } 1208 if (allocated_S_St) { 1209 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1210 } 1211 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1212 #if defined(PETSC_USE_COMPLEX) 1213 ierr = PetscFree(rwork);CHKERRQ(ierr); 1214 #endif 1215 if (pcbddc->dbg_flag) { 1216 PetscInt maxneigs_r; 1217 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1219 } 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "PCBDDCSetUpSolvers" 1225 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1226 { 1227 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1228 PetscScalar *coarse_submat_vals; 1229 PetscErrorCode ierr; 1230 1231 PetscFunctionBegin; 1232 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1233 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1234 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1235 1236 /* Setup local neumann solver ksp_R */ 1237 /* PCBDDCSetUpLocalScatters should be called first! */ 1238 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1239 1240 /* Change global null space passed in by the user if change of basis has been requested */ 1241 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1242 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1243 } 1244 1245 /* 1246 Setup local correction and local part of coarse basis. 1247 Gives back the dense local part of the coarse matrix in column major ordering 1248 */ 1249 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1250 1251 /* Compute total number of coarse nodes and setup coarse solver */ 1252 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1253 1254 /* free */ 1255 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "PCBDDCResetCustomization" 1261 PetscErrorCode PCBDDCResetCustomization(PC pc) 1262 { 1263 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1268 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1269 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1270 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1271 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1272 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1273 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1274 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1275 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1276 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1277 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1278 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "PCBDDCResetTopography" 1284 PetscErrorCode PCBDDCResetTopography(PC pc) 1285 { 1286 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1287 PetscInt i; 1288 PetscErrorCode ierr; 1289 1290 PetscFunctionBegin; 1291 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1292 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1293 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1294 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1295 for (i=0;i<pcbddc->n_local_subs;i++) { 1296 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1297 } 1298 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1299 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "PCBDDCResetSolvers" 1305 PetscErrorCode PCBDDCResetSolvers(PC pc) 1306 { 1307 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1308 PetscErrorCode ierr; 1309 1310 PetscFunctionBegin; 1311 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1312 if (pcbddc->coarse_phi_B) { 1313 PetscScalar *array; 1314 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1315 ierr = PetscFree(array);CHKERRQ(ierr); 1316 } 1317 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1318 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1319 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1320 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1321 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1322 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1323 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1324 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1325 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1326 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1327 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1328 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1329 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1330 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1331 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1332 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1333 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1334 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1335 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1336 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1337 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1338 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1339 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1341 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1342 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1343 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1344 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1345 if (pcbddc->benign_zerodiag_subs) { 1346 PetscInt i; 1347 for (i=0;i<pcbddc->benign_n;i++) { 1348 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1349 } 1350 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1351 } 1352 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1358 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1359 { 1360 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1361 PC_IS *pcis = (PC_IS*)pc->data; 1362 VecType impVecType; 1363 PetscInt n_constraints,n_R,old_size; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 if (!pcbddc->ConstraintMatrix) { 1368 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1369 } 1370 /* get sizes */ 1371 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1372 n_R = pcis->n - pcbddc->n_vertices; 1373 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1374 /* local work vectors (try to avoid unneeded work)*/ 1375 /* R nodes */ 1376 old_size = -1; 1377 if (pcbddc->vec1_R) { 1378 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1379 } 1380 if (n_R != old_size) { 1381 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1382 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1383 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1384 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1385 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1386 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1387 } 1388 /* local primal dofs */ 1389 old_size = -1; 1390 if (pcbddc->vec1_P) { 1391 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1392 } 1393 if (pcbddc->local_primal_size != old_size) { 1394 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1395 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1396 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1397 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1398 } 1399 /* local explicit constraints */ 1400 old_size = -1; 1401 if (pcbddc->vec1_C) { 1402 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1403 } 1404 if (n_constraints && n_constraints != old_size) { 1405 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1406 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1407 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1408 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "PCBDDCSetUpCorrection" 1415 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1416 { 1417 PetscErrorCode ierr; 1418 /* pointers to pcis and pcbddc */ 1419 PC_IS* pcis = (PC_IS*)pc->data; 1420 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1421 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1422 /* submatrices of local problem */ 1423 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1424 /* submatrices of local coarse problem */ 1425 Mat S_VV,S_CV,S_VC,S_CC; 1426 /* working matrices */ 1427 Mat C_CR; 1428 /* additional working stuff */ 1429 PC pc_R; 1430 Mat F; 1431 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1432 1433 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1434 PetscScalar *work; 1435 PetscInt *idx_V_B; 1436 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1437 PetscInt i,n_R,n_D,n_B; 1438 1439 /* some shortcuts to scalars */ 1440 PetscScalar one=1.0,m_one=-1.0; 1441 1442 PetscFunctionBegin; 1443 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1445 } 1446 1447 /* Set Non-overlapping dimensions */ 1448 n_vertices = pcbddc->n_vertices; 1449 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1450 n_B = pcis->n_B; 1451 n_D = pcis->n - n_B; 1452 n_R = pcis->n - n_vertices; 1453 1454 /* vertices in boundary numbering */ 1455 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1456 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1457 if (i != n_vertices) { 1458 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1459 } 1460 1461 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1462 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1463 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1464 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1465 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1466 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1467 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1468 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1469 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1470 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1471 1472 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1473 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1474 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1475 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1476 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1477 lda_rhs = n_R; 1478 need_benign_correction = PETSC_FALSE; 1479 if (isLU || isILU || isCHOL) { 1480 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1481 } else if (sub_schurs->reuse_solver) { 1482 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1483 MatFactorType type; 1484 1485 F = reuse_solver->F; 1486 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1487 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1488 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1489 need_benign_correction = !!reuse_solver->benign_n; 1490 } else { 1491 F = NULL; 1492 } 1493 1494 /* allocate workspace */ 1495 n = 0; 1496 if (n_constraints) { 1497 n += lda_rhs*n_constraints; 1498 } 1499 if (n_vertices) { 1500 n = PetscMax(2*lda_rhs*n_vertices,n); 1501 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1502 } 1503 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1504 1505 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1506 if (n_constraints) { 1507 Mat M1,M2,M3,C_B; 1508 IS is_aux; 1509 PetscScalar *array,*array2; 1510 1511 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1512 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1513 1514 /* Extract constraints on R nodes: C_{CR} */ 1515 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1516 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1517 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1518 1519 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1520 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1521 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1522 for (i=0;i<n_constraints;i++) { 1523 const PetscScalar *row_cmat_values; 1524 const PetscInt *row_cmat_indices; 1525 PetscInt size_of_constraint,j; 1526 1527 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1528 for (j=0;j<size_of_constraint;j++) { 1529 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1530 } 1531 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1532 } 1533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1534 if (F) { 1535 Mat B; 1536 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1538 if (need_benign_correction) { 1539 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1540 1541 /* rhs is already zero on interior dofs, no need to change the rhs */ 1542 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1543 } 1544 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1545 if (need_benign_correction) { 1546 PetscScalar *marr; 1547 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1548 1549 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1550 for (i=0;i<n_constraints;i++) { 1551 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1552 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1553 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1554 } 1555 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1556 } 1557 ierr = MatDestroy(&B);CHKERRQ(ierr); 1558 } else { 1559 PetscScalar *marr; 1560 1561 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1562 for (i=0;i<n_constraints;i++) { 1563 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1564 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1565 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1566 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1567 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1568 } 1569 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1570 } 1571 if (!pcbddc->switch_static) { 1572 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1573 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1574 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1575 for (i=0;i<n_constraints;i++) { 1576 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1577 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1578 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1581 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1582 } 1583 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1584 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1585 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1586 } else { 1587 if (lda_rhs != n_R) { 1588 IS dummy; 1589 1590 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1591 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1592 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1593 } else { 1594 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1595 pcbddc->local_auxmat2 = local_auxmat2_R; 1596 } 1597 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1598 } 1599 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1600 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1601 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1602 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1603 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1604 if (isCHOL) { 1605 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1606 } else { 1607 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1608 } 1609 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1610 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1611 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1612 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1613 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1614 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1615 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1616 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1617 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1618 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1619 } 1620 1621 /* Get submatrices from subdomain matrix */ 1622 if (n_vertices) { 1623 IS is_aux; 1624 1625 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1626 IS tis; 1627 1628 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1629 ierr = ISSort(tis);CHKERRQ(ierr); 1630 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1631 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1632 } else { 1633 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1634 } 1635 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1636 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1637 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1638 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1639 } 1640 1641 /* Matrix of coarse basis functions (local) */ 1642 if (pcbddc->coarse_phi_B) { 1643 PetscInt on_B,on_primal,on_D=n_D; 1644 if (pcbddc->coarse_phi_D) { 1645 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1646 } 1647 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1648 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1649 PetscScalar *marray; 1650 1651 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1652 ierr = PetscFree(marray);CHKERRQ(ierr); 1653 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1654 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1655 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1656 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1657 } 1658 } 1659 1660 if (!pcbddc->coarse_phi_B) { 1661 PetscScalar *marray; 1662 1663 n = n_B*pcbddc->local_primal_size; 1664 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1665 n += n_D*pcbddc->local_primal_size; 1666 } 1667 if (!pcbddc->symmetric_primal) { 1668 n *= 2; 1669 } 1670 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1672 n = n_B*pcbddc->local_primal_size; 1673 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1674 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1675 n += n_D*pcbddc->local_primal_size; 1676 } 1677 if (!pcbddc->symmetric_primal) { 1678 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1679 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1680 n = n_B*pcbddc->local_primal_size; 1681 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1682 } 1683 } else { 1684 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1685 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1686 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1687 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1688 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1689 } 1690 } 1691 } 1692 1693 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1694 p0_lidx_I = NULL; 1695 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1696 const PetscInt *idxs; 1697 1698 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1699 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1700 for (i=0;i<pcbddc->benign_n;i++) { 1701 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1702 } 1703 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1704 } 1705 1706 /* vertices */ 1707 if (n_vertices) { 1708 1709 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1710 1711 if (n_R) { 1712 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1713 PetscBLASInt B_N,B_one = 1; 1714 PetscScalar *x,*y; 1715 PetscBool isseqaij; 1716 1717 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1718 if (need_benign_correction) { 1719 ISLocalToGlobalMapping RtoN; 1720 IS is_p0; 1721 PetscInt *idxs_p0,n; 1722 1723 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1724 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1725 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1726 if (n != pcbddc->benign_n) { 1727 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1728 } 1729 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1730 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1731 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1732 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1733 } 1734 1735 if (lda_rhs == n_R) { 1736 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1737 } else { 1738 PetscScalar *av,*array; 1739 const PetscInt *xadj,*adjncy; 1740 PetscInt n; 1741 PetscBool flg_row; 1742 1743 array = work+lda_rhs*n_vertices; 1744 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1745 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1746 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1747 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1748 for (i=0;i<n;i++) { 1749 PetscInt j; 1750 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1751 } 1752 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1753 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1755 } 1756 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1757 if (need_benign_correction) { 1758 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1759 PetscScalar *marr; 1760 1761 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1762 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1763 1764 | 0 0 0 | (V) 1765 L = | 0 0 -1 | (P-p0) 1766 | 0 0 -1 | (p0) 1767 1768 */ 1769 for (i=0;i<reuse_solver->benign_n;i++) { 1770 const PetscScalar *vals; 1771 const PetscInt *idxs,*idxs_zero; 1772 PetscInt n,j,nz; 1773 1774 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1775 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1776 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1777 for (j=0;j<n;j++) { 1778 PetscScalar val = vals[j]; 1779 PetscInt k,col = idxs[j]; 1780 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1781 } 1782 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1783 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1784 } 1785 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1786 } 1787 if (F) { 1788 /* need to correct the rhs */ 1789 if (need_benign_correction) { 1790 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1791 PetscScalar *marr; 1792 1793 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1794 for (i=0;i<n_vertices;i++) { 1795 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1796 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1797 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1798 } 1799 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1800 } 1801 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1802 /* need to correct the solution */ 1803 if (need_benign_correction) { 1804 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1805 PetscScalar *marr; 1806 1807 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1808 for (i=0;i<n_vertices;i++) { 1809 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1810 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1811 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1812 } 1813 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1814 } 1815 } else { 1816 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1817 for (i=0;i<n_vertices;i++) { 1818 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1819 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1820 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1821 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1822 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1823 } 1824 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1825 } 1826 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1827 /* S_VV and S_CV */ 1828 if (n_constraints) { 1829 Mat B; 1830 1831 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1832 for (i=0;i<n_vertices;i++) { 1833 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1834 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1835 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1836 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1837 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1838 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1839 } 1840 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1841 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1842 ierr = MatDestroy(&B);CHKERRQ(ierr); 1843 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1844 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1845 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1846 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1847 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1848 ierr = MatDestroy(&B);CHKERRQ(ierr); 1849 } 1850 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1851 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1852 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1853 } 1854 if (lda_rhs != n_R) { 1855 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1856 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1857 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1858 } 1859 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1860 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1861 if (need_benign_correction) { 1862 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1863 PetscScalar *marr,*sums; 1864 1865 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1866 ierr = MatDenseGetArray(S_VVt,&marr); 1867 for (i=0;i<reuse_solver->benign_n;i++) { 1868 const PetscScalar *vals; 1869 const PetscInt *idxs,*idxs_zero; 1870 PetscInt n,j,nz; 1871 1872 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1873 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1874 for (j=0;j<n_vertices;j++) { 1875 PetscInt k; 1876 sums[j] = 0.; 1877 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1878 } 1879 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1880 for (j=0;j<n;j++) { 1881 PetscScalar val = vals[j]; 1882 PetscInt k; 1883 for (k=0;k<n_vertices;k++) { 1884 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1885 } 1886 } 1887 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1888 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1889 } 1890 ierr = PetscFree(sums);CHKERRQ(ierr); 1891 ierr = MatDenseRestoreArray(S_VVt,&marr); 1892 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1893 } 1894 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1895 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1896 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1897 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1898 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1899 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1900 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1901 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1902 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1903 } else { 1904 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1905 } 1906 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1907 1908 /* coarse basis functions */ 1909 for (i=0;i<n_vertices;i++) { 1910 PetscScalar *y; 1911 1912 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1913 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1914 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1915 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1916 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1917 y[n_B*i+idx_V_B[i]] = 1.0; 1918 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1919 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1920 1921 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1922 PetscInt j; 1923 1924 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1925 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1926 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1927 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1928 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1929 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1930 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1931 } 1932 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1933 } 1934 /* if n_R == 0 the object is not destroyed */ 1935 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1936 } 1937 1938 if (n_constraints) { 1939 Mat B; 1940 1941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1942 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1943 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1944 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1945 if (n_vertices) { 1946 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1947 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1948 } else { 1949 Mat S_VCt; 1950 1951 if (lda_rhs != n_R) { 1952 ierr = MatDestroy(&B);CHKERRQ(ierr); 1953 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1954 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1955 } 1956 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1957 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1958 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1959 } 1960 } 1961 ierr = MatDestroy(&B);CHKERRQ(ierr); 1962 /* coarse basis functions */ 1963 for (i=0;i<n_constraints;i++) { 1964 PetscScalar *y; 1965 1966 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1967 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1968 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1969 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1971 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1972 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1973 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1974 PetscInt j; 1975 1976 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1977 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1978 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1979 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1980 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1981 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1982 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1983 } 1984 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1985 } 1986 } 1987 if (n_constraints) { 1988 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1989 } 1990 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1991 1992 /* coarse matrix entries relative to B_0 */ 1993 if (pcbddc->benign_n) { 1994 Mat B0_B,B0_BPHI; 1995 IS is_dummy; 1996 PetscScalar *data; 1997 PetscInt j; 1998 1999 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2000 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2001 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2002 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2003 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2004 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2005 for (j=0;j<pcbddc->benign_n;j++) { 2006 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2007 for (i=0;i<pcbddc->local_primal_size;i++) { 2008 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2009 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2010 } 2011 } 2012 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2013 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2014 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2015 } 2016 2017 /* compute other basis functions for non-symmetric problems */ 2018 if (!pcbddc->symmetric_primal) { 2019 Mat B_V=NULL,B_C=NULL; 2020 PetscScalar *marray; 2021 2022 if (n_constraints) { 2023 Mat S_CCT,C_CRT; 2024 2025 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2026 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2027 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2028 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2029 if (n_vertices) { 2030 Mat S_VCT; 2031 2032 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2033 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2034 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2035 } 2036 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2037 } 2038 if (n_vertices && n_R) { 2039 PetscScalar *av,*marray; 2040 const PetscInt *xadj,*adjncy; 2041 PetscInt n; 2042 PetscBool flg_row; 2043 2044 /* B_V = B_V - A_VR^T */ 2045 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2046 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2047 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2048 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2049 for (i=0;i<n;i++) { 2050 PetscInt j; 2051 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2052 } 2053 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2054 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2055 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2056 } 2057 2058 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2059 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2060 for (i=0;i<n_vertices;i++) { 2061 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2062 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2063 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2064 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2065 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2066 } 2067 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2068 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2069 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2070 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2071 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2072 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2073 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2074 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2075 } 2076 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2077 /* coarse basis functions */ 2078 for (i=0;i<pcbddc->local_primal_size;i++) { 2079 PetscScalar *y; 2080 2081 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2082 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2083 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2084 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2085 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2086 if (i<n_vertices) { 2087 y[n_B*i+idx_V_B[i]] = 1.0; 2088 } 2089 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2090 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2091 2092 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2093 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2094 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2095 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2096 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2097 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2098 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2099 } 2100 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2101 } 2102 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2103 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2104 } 2105 /* free memory */ 2106 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2107 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2108 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2109 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2110 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2111 ierr = PetscFree(work);CHKERRQ(ierr); 2112 if (n_vertices) { 2113 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2114 } 2115 if (n_constraints) { 2116 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2117 } 2118 /* Checking coarse_sub_mat and coarse basis functios */ 2119 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2120 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2121 if (pcbddc->dbg_flag) { 2122 Mat coarse_sub_mat; 2123 Mat AUXMAT,TM1,TM2,TM3,TM4; 2124 Mat coarse_phi_D,coarse_phi_B; 2125 Mat coarse_psi_D,coarse_psi_B; 2126 Mat A_II,A_BB,A_IB,A_BI; 2127 Mat C_B,CPHI; 2128 IS is_dummy; 2129 Vec mones; 2130 MatType checkmattype=MATSEQAIJ; 2131 PetscReal real_value; 2132 2133 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2134 Mat A; 2135 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2136 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2137 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2138 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2139 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2140 ierr = MatDestroy(&A);CHKERRQ(ierr); 2141 } else { 2142 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2143 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2144 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2145 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2146 } 2147 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2148 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2149 if (!pcbddc->symmetric_primal) { 2150 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2151 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2152 } 2153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2154 2155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2156 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2157 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2158 if (!pcbddc->symmetric_primal) { 2159 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2160 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2161 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2162 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2163 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2164 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2165 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2166 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2167 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2168 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2169 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2170 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2171 } else { 2172 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2173 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2174 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2175 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2176 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2177 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2178 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2179 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2180 } 2181 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2182 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2183 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2184 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2185 if (pcbddc->benign_n) { 2186 Mat B0_B,B0_BPHI; 2187 PetscScalar *data,*data2; 2188 PetscInt j; 2189 2190 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2191 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2192 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2193 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2194 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2195 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2196 for (j=0;j<pcbddc->benign_n;j++) { 2197 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2198 for (i=0;i<pcbddc->local_primal_size;i++) { 2199 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2200 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2201 } 2202 } 2203 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2204 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2205 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2206 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2207 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2208 } 2209 #if 0 2210 { 2211 PetscViewer viewer; 2212 char filename[256]; 2213 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2214 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2215 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2216 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2217 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2218 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2219 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2220 if (save_change) { 2221 Mat phi_B; 2222 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2223 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2224 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2225 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2226 } else { 2227 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2228 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2229 } 2230 if (pcbddc->coarse_phi_D) { 2231 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2232 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2233 } 2234 if (pcbddc->coarse_psi_B) { 2235 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2236 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2237 } 2238 if (pcbddc->coarse_psi_D) { 2239 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2240 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2241 } 2242 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2243 } 2244 #endif 2245 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2246 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2248 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2249 2250 /* check constraints */ 2251 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2252 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2254 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2255 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2256 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2257 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2258 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2260 if (!pcbddc->symmetric_primal) { 2261 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2262 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2263 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2264 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2266 } 2267 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2268 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2269 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2270 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2271 } 2272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2273 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2274 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2275 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2276 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2277 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2278 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2279 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2280 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2281 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2282 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2283 if (!pcbddc->symmetric_primal) { 2284 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2285 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2286 } 2287 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2288 } 2289 /* get back data */ 2290 *coarse_submat_vals_n = coarse_submat_vals; 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2296 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2297 { 2298 Mat *work_mat; 2299 IS isrow_s,iscol_s; 2300 PetscBool rsorted,csorted; 2301 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2306 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2307 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2308 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2309 2310 if (!rsorted) { 2311 const PetscInt *idxs; 2312 PetscInt *idxs_sorted,i; 2313 2314 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2315 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2316 for (i=0;i<rsize;i++) { 2317 idxs_perm_r[i] = i; 2318 } 2319 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2320 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2321 for (i=0;i<rsize;i++) { 2322 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2323 } 2324 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2325 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2326 } else { 2327 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2328 isrow_s = isrow; 2329 } 2330 2331 if (!csorted) { 2332 if (isrow == iscol) { 2333 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2334 iscol_s = isrow_s; 2335 } else { 2336 const PetscInt *idxs; 2337 PetscInt *idxs_sorted,i; 2338 2339 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2340 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2341 for (i=0;i<csize;i++) { 2342 idxs_perm_c[i] = i; 2343 } 2344 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2345 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2346 for (i=0;i<csize;i++) { 2347 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2348 } 2349 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2350 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2351 } 2352 } else { 2353 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2354 iscol_s = iscol; 2355 } 2356 2357 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2358 2359 if (!rsorted || !csorted) { 2360 Mat new_mat; 2361 IS is_perm_r,is_perm_c; 2362 2363 if (!rsorted) { 2364 PetscInt *idxs_r,i; 2365 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2366 for (i=0;i<rsize;i++) { 2367 idxs_r[idxs_perm_r[i]] = i; 2368 } 2369 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2370 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2371 } else { 2372 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2373 } 2374 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2375 2376 if (!csorted) { 2377 if (isrow_s == iscol_s) { 2378 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2379 is_perm_c = is_perm_r; 2380 } else { 2381 PetscInt *idxs_c,i; 2382 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2383 for (i=0;i<csize;i++) { 2384 idxs_c[idxs_perm_c[i]] = i; 2385 } 2386 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2387 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2388 } 2389 } else { 2390 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2391 } 2392 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2393 2394 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2395 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2396 work_mat[0] = new_mat; 2397 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2398 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2399 } 2400 2401 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2402 *B = work_mat[0]; 2403 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2404 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2405 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2411 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2412 { 2413 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2414 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2415 Mat new_mat; 2416 IS is_local,is_global; 2417 PetscInt local_size; 2418 PetscBool isseqaij; 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2423 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2424 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2425 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2426 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2427 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2428 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2429 2430 /* check */ 2431 if (pcbddc->dbg_flag) { 2432 Vec x,x_change; 2433 PetscReal error; 2434 2435 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2436 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2437 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2438 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2439 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2440 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2441 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2442 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2443 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2444 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2445 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2446 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2447 ierr = VecDestroy(&x);CHKERRQ(ierr); 2448 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2449 } 2450 2451 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2452 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2453 if (isseqaij) { 2454 Mat M; 2455 2456 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2457 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2458 ierr = MatDestroy(&M);CHKERRQ(ierr); 2459 } else { 2460 Mat work_mat,M; 2461 2462 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2463 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2464 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2465 ierr = MatDestroy(&M);CHKERRQ(ierr); 2466 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2467 } 2468 if (matis->A->symmetric_set) { 2469 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2470 #if !defined(PETSC_USE_COMPLEX) 2471 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2472 #endif 2473 } 2474 /* 2475 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2476 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2477 */ 2478 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2484 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2485 { 2486 PC_IS* pcis = (PC_IS*)(pc->data); 2487 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2488 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2489 PetscInt *idx_R_local=NULL; 2490 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2491 PetscInt vbs,bs; 2492 PetscBT bitmask=NULL; 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 /* 2497 No need to setup local scatters if 2498 - primal space is unchanged 2499 AND 2500 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2501 AND 2502 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2503 */ 2504 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2505 PetscFunctionReturn(0); 2506 } 2507 /* destroy old objects */ 2508 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2509 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2510 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2511 /* Set Non-overlapping dimensions */ 2512 n_B = pcis->n_B; 2513 n_D = pcis->n - n_B; 2514 n_vertices = pcbddc->n_vertices; 2515 2516 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2517 2518 /* create auxiliary bitmask and allocate workspace */ 2519 if (!sub_schurs->reuse_solver) { 2520 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2521 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2522 for (i=0;i<n_vertices;i++) { 2523 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2524 } 2525 2526 for (i=0, n_R=0; i<pcis->n; i++) { 2527 if (!PetscBTLookup(bitmask,i)) { 2528 idx_R_local[n_R++] = i; 2529 } 2530 } 2531 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2532 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2533 2534 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2535 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2536 } 2537 2538 /* Block code */ 2539 vbs = 1; 2540 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2541 if (bs>1 && !(n_vertices%bs)) { 2542 PetscBool is_blocked = PETSC_TRUE; 2543 PetscInt *vary; 2544 if (!sub_schurs->reuse_solver) { 2545 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2546 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2547 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2548 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2549 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2550 for (i=0; i<pcis->n/bs; i++) { 2551 if (vary[i]!=0 && vary[i]!=bs) { 2552 is_blocked = PETSC_FALSE; 2553 break; 2554 } 2555 } 2556 ierr = PetscFree(vary);CHKERRQ(ierr); 2557 } else { 2558 /* Verify directly the R set */ 2559 for (i=0; i<n_R/bs; i++) { 2560 PetscInt j,node=idx_R_local[bs*i]; 2561 for (j=1; j<bs; j++) { 2562 if (node != idx_R_local[bs*i+j]-j) { 2563 is_blocked = PETSC_FALSE; 2564 break; 2565 } 2566 } 2567 } 2568 } 2569 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2570 vbs = bs; 2571 for (i=0;i<n_R/vbs;i++) { 2572 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2573 } 2574 } 2575 } 2576 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2577 if (sub_schurs->reuse_solver) { 2578 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2579 2580 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2581 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2582 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2583 reuse_solver->is_R = pcbddc->is_R_local; 2584 } else { 2585 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2586 } 2587 2588 /* print some info if requested */ 2589 if (pcbddc->dbg_flag) { 2590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2592 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2595 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2597 } 2598 2599 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2600 if (!sub_schurs->reuse_solver) { 2601 IS is_aux1,is_aux2; 2602 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2603 2604 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2605 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2606 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2607 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2608 for (i=0; i<n_D; i++) { 2609 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2610 } 2611 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2612 for (i=0, j=0; i<n_R; i++) { 2613 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2614 aux_array1[j++] = i; 2615 } 2616 } 2617 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2618 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2619 for (i=0, j=0; i<n_B; i++) { 2620 if (!PetscBTLookup(bitmask,is_indices[i])) { 2621 aux_array2[j++] = i; 2622 } 2623 } 2624 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2625 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2626 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2627 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2628 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2629 2630 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2631 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2632 for (i=0, j=0; i<n_R; i++) { 2633 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2634 aux_array1[j++] = i; 2635 } 2636 } 2637 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2638 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2639 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2640 } 2641 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2642 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2643 } else { 2644 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2645 IS tis; 2646 PetscInt schur_size; 2647 2648 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2649 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2650 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2651 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2652 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2653 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2654 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2655 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2656 } 2657 } 2658 PetscFunctionReturn(0); 2659 } 2660 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2664 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2665 { 2666 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2667 PC_IS *pcis = (PC_IS*)pc->data; 2668 PC pc_temp; 2669 Mat A_RR; 2670 MatReuse reuse; 2671 PetscScalar m_one = -1.0; 2672 PetscReal value; 2673 PetscInt n_D,n_R; 2674 PetscBool use_exact,use_exact_reduced,issbaij; 2675 PetscErrorCode ierr; 2676 /* prefixes stuff */ 2677 char dir_prefix[256],neu_prefix[256],str_level[16]; 2678 size_t len; 2679 2680 PetscFunctionBegin; 2681 2682 /* compute prefixes */ 2683 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2684 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2685 if (!pcbddc->current_level) { 2686 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2687 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2688 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2689 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2690 } else { 2691 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2692 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2693 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2694 len -= 15; /* remove "pc_bddc_coarse_" */ 2695 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2696 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2697 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2698 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2699 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2700 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2701 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2702 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2703 } 2704 2705 /* DIRICHLET PROBLEM */ 2706 if (dirichlet) { 2707 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2708 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2709 if (!sub_schurs->reuse_solver) { 2710 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2711 } 2712 if (pcbddc->dbg_flag) { 2713 Mat A_IIn; 2714 2715 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2716 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2717 pcis->A_II = A_IIn; 2718 } 2719 } 2720 if (pcbddc->local_mat->symmetric_set) { 2721 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2722 } 2723 /* Matrix for Dirichlet problem is pcis->A_II */ 2724 n_D = pcis->n - pcis->n_B; 2725 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2726 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2727 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2728 /* default */ 2729 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2730 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2731 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2732 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2733 if (issbaij) { 2734 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2735 } else { 2736 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2737 } 2738 /* Allow user's customization */ 2739 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2740 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2741 } 2742 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2743 if (sub_schurs->reuse_solver) { 2744 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2745 2746 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2747 } 2748 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2749 if (!n_D) { 2750 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2751 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2752 } 2753 /* Set Up KSP for Dirichlet problem of BDDC */ 2754 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2755 /* set ksp_D into pcis data */ 2756 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2757 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2758 pcis->ksp_D = pcbddc->ksp_D; 2759 } 2760 2761 /* NEUMANN PROBLEM */ 2762 A_RR = 0; 2763 if (neumann) { 2764 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2765 PetscInt ibs,mbs; 2766 PetscBool issbaij; 2767 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2768 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2769 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2770 if (pcbddc->ksp_R) { /* already created ksp */ 2771 PetscInt nn_R; 2772 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2773 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2774 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2775 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2776 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2777 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2778 reuse = MAT_INITIAL_MATRIX; 2779 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2780 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2781 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2782 reuse = MAT_INITIAL_MATRIX; 2783 } else { /* safe to reuse the matrix */ 2784 reuse = MAT_REUSE_MATRIX; 2785 } 2786 } 2787 /* last check */ 2788 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2789 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2790 reuse = MAT_INITIAL_MATRIX; 2791 } 2792 } else { /* first time, so we need to create the matrix */ 2793 reuse = MAT_INITIAL_MATRIX; 2794 } 2795 /* extract A_RR */ 2796 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2797 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2798 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2799 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2800 if (matis->A == pcbddc->local_mat) { 2801 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2802 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2803 } else { 2804 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2805 } 2806 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2807 if (matis->A == pcbddc->local_mat) { 2808 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2809 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2810 } else { 2811 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2812 } 2813 } 2814 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2815 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2816 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2817 } else { 2818 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2819 } 2820 if (pcbddc->local_mat->symmetric_set) { 2821 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2822 } 2823 if (!pcbddc->ksp_R) { /* create object if not present */ 2824 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2825 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2826 /* default */ 2827 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2828 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2829 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2830 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2831 if (issbaij) { 2832 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2833 } else { 2834 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2835 } 2836 /* Allow user's customization */ 2837 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2838 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2839 } 2840 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2841 if (!n_R) { 2842 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2843 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2844 } 2845 /* Reuse solver if it is present */ 2846 if (sub_schurs->reuse_solver) { 2847 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2848 2849 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2850 } 2851 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2852 /* Set Up KSP for Neumann problem of BDDC */ 2853 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2854 } 2855 /* free Neumann problem's matrix */ 2856 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2857 2858 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2859 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2860 if (pcbddc->dbg_flag) { 2861 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2862 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2863 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2864 } 2865 if (dirichlet) { /* Dirichlet */ 2866 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2867 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2868 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2869 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2870 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2871 /* need to be adapted? */ 2872 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2873 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2874 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2875 /* print info */ 2876 if (pcbddc->dbg_flag) { 2877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2878 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2879 } 2880 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2881 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2882 } 2883 } 2884 if (neumann) { /* Neumann */ 2885 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2886 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2887 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2888 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2889 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2890 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2891 /* need to be adapted? */ 2892 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2893 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2894 /* print info */ 2895 if (pcbddc->dbg_flag) { 2896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2898 } 2899 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2900 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2901 } 2902 } 2903 } 2904 PetscFunctionReturn(0); 2905 } 2906 2907 #undef __FUNCT__ 2908 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2909 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2910 { 2911 PetscErrorCode ierr; 2912 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2913 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2914 2915 PetscFunctionBegin; 2916 if (!sub_schurs->reuse_solver) { 2917 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2918 } 2919 if (!pcbddc->switch_static) { 2920 if (applytranspose && pcbddc->local_auxmat1) { 2921 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2922 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2923 } 2924 if (!sub_schurs->reuse_solver) { 2925 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2926 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2927 } else { 2928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2929 2930 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2931 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2932 } 2933 } else { 2934 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2935 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2936 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2937 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2938 if (applytranspose && pcbddc->local_auxmat1) { 2939 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2940 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2941 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2942 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2943 } 2944 } 2945 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 2946 if (applytranspose) { 2947 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2948 } else { 2949 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2950 } 2951 } else { 2952 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2953 2954 if (applytranspose) { 2955 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2956 } else { 2957 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2958 } 2959 } 2960 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2961 if (!pcbddc->switch_static) { 2962 if (!sub_schurs->reuse_solver) { 2963 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2964 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2965 } else { 2966 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2967 2968 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2969 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2970 } 2971 if (!applytranspose && pcbddc->local_auxmat1) { 2972 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2973 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2974 } 2975 } else { 2976 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2977 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2978 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2979 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2980 if (!applytranspose && pcbddc->local_auxmat1) { 2981 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2982 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2983 } 2984 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2985 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2986 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2987 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2988 } 2989 PetscFunctionReturn(0); 2990 } 2991 2992 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2993 #undef __FUNCT__ 2994 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2995 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2996 { 2997 PetscErrorCode ierr; 2998 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2999 PC_IS* pcis = (PC_IS*) (pc->data); 3000 const PetscScalar zero = 0.0; 3001 3002 PetscFunctionBegin; 3003 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3004 if (!pcbddc->benign_apply_coarse_only) { 3005 if (applytranspose) { 3006 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3007 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3008 } else { 3009 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3010 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3011 } 3012 } else { 3013 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3014 } 3015 3016 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3017 if (pcbddc->benign_n) { 3018 PetscScalar *array; 3019 PetscInt j; 3020 3021 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3022 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3023 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3024 } 3025 3026 /* start communications from local primal nodes to rhs of coarse solver */ 3027 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3028 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3029 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3030 3031 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3032 /* TODO remove null space when doing multilevel */ 3033 if (pcbddc->coarse_ksp) { 3034 Mat coarse_mat; 3035 Vec rhs,sol; 3036 MatNullSpace nullsp; 3037 3038 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3039 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3040 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3041 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3042 if (nullsp) { 3043 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3044 } 3045 if (applytranspose) { 3046 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3047 } else { 3048 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3049 } 3050 if (nullsp) { 3051 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3052 } 3053 } 3054 3055 /* Local solution on R nodes */ 3056 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3057 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3058 } 3059 /* communications from coarse sol to local primal nodes */ 3060 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3061 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3062 3063 /* Sum contributions from the two levels */ 3064 if (!pcbddc->benign_apply_coarse_only) { 3065 if (applytranspose) { 3066 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3067 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3068 } else { 3069 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3070 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3071 } 3072 /* store p0 */ 3073 if (pcbddc->benign_n) { 3074 PetscScalar *array; 3075 PetscInt j; 3076 3077 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3078 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3079 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3080 } 3081 } else { /* expand the coarse solution */ 3082 if (applytranspose) { 3083 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3084 } else { 3085 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3086 } 3087 } 3088 PetscFunctionReturn(0); 3089 } 3090 3091 #undef __FUNCT__ 3092 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3093 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3094 { 3095 PetscErrorCode ierr; 3096 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3097 PetscScalar *array; 3098 Vec from,to; 3099 3100 PetscFunctionBegin; 3101 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3102 from = pcbddc->coarse_vec; 3103 to = pcbddc->vec1_P; 3104 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3105 Vec tvec; 3106 3107 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3108 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3109 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3110 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3111 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3112 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3113 } 3114 } else { /* from local to global -> put data in coarse right hand side */ 3115 from = pcbddc->vec1_P; 3116 to = pcbddc->coarse_vec; 3117 } 3118 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3119 PetscFunctionReturn(0); 3120 } 3121 3122 #undef __FUNCT__ 3123 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3124 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3125 { 3126 PetscErrorCode ierr; 3127 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3128 PetscScalar *array; 3129 Vec from,to; 3130 3131 PetscFunctionBegin; 3132 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3133 from = pcbddc->coarse_vec; 3134 to = pcbddc->vec1_P; 3135 } else { /* from local to global -> put data in coarse right hand side */ 3136 from = pcbddc->vec1_P; 3137 to = pcbddc->coarse_vec; 3138 } 3139 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3140 if (smode == SCATTER_FORWARD) { 3141 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3142 Vec tvec; 3143 3144 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3145 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3146 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3147 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3148 } 3149 } else { 3150 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3151 ierr = VecResetArray(from);CHKERRQ(ierr); 3152 } 3153 } 3154 PetscFunctionReturn(0); 3155 } 3156 3157 /* uncomment for testing purposes */ 3158 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3161 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3162 { 3163 PetscErrorCode ierr; 3164 PC_IS* pcis = (PC_IS*)(pc->data); 3165 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3166 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3167 /* one and zero */ 3168 PetscScalar one=1.0,zero=0.0; 3169 /* space to store constraints and their local indices */ 3170 PetscScalar *constraints_data; 3171 PetscInt *constraints_idxs,*constraints_idxs_B; 3172 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3173 PetscInt *constraints_n; 3174 /* iterators */ 3175 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3176 /* BLAS integers */ 3177 PetscBLASInt lwork,lierr; 3178 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3179 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3180 /* reuse */ 3181 PetscInt olocal_primal_size,olocal_primal_size_cc; 3182 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3183 /* change of basis */ 3184 PetscBool qr_needed; 3185 PetscBT change_basis,qr_needed_idx; 3186 /* auxiliary stuff */ 3187 PetscInt *nnz,*is_indices; 3188 PetscInt ncc; 3189 /* some quantities */ 3190 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3191 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3192 3193 PetscFunctionBegin; 3194 /* Destroy Mat objects computed previously */ 3195 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3196 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3197 /* save info on constraints from previous setup (if any) */ 3198 olocal_primal_size = pcbddc->local_primal_size; 3199 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3200 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3201 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3202 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3203 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3204 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3205 3206 /* print some info */ 3207 if (pcbddc->dbg_flag) { 3208 IS vertices; 3209 PetscInt nv,nedges,nfaces; 3210 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3211 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3212 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3213 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3219 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3220 } 3221 3222 if (!pcbddc->adaptive_selection) { 3223 IS ISForVertices,*ISForFaces,*ISForEdges; 3224 MatNullSpace nearnullsp; 3225 const Vec *nearnullvecs; 3226 Vec *localnearnullsp; 3227 PetscScalar *array; 3228 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3229 PetscBool nnsp_has_cnst; 3230 /* LAPACK working arrays for SVD or POD */ 3231 PetscBool skip_lapack,boolforchange; 3232 PetscScalar *work; 3233 PetscReal *singular_vals; 3234 #if defined(PETSC_USE_COMPLEX) 3235 PetscReal *rwork; 3236 #endif 3237 #if defined(PETSC_MISSING_LAPACK_GESVD) 3238 PetscScalar *temp_basis,*correlation_mat; 3239 #else 3240 PetscBLASInt dummy_int=1; 3241 PetscScalar dummy_scalar=1.; 3242 #endif 3243 3244 /* Get index sets for faces, edges and vertices from graph */ 3245 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3246 /* free unneeded index sets */ 3247 if (!pcbddc->use_vertices) { 3248 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3249 } 3250 if (!pcbddc->use_edges) { 3251 for (i=0;i<n_ISForEdges;i++) { 3252 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3253 } 3254 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3255 n_ISForEdges = 0; 3256 } 3257 if (!pcbddc->use_faces) { 3258 for (i=0;i<n_ISForFaces;i++) { 3259 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3260 } 3261 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3262 n_ISForFaces = 0; 3263 } 3264 3265 #if defined(PETSC_USE_DEBUG) 3266 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3267 Also use_change_of_basis should be consistent among processors */ 3268 if (pcbddc->NullSpace) { 3269 PetscBool tbool[2],gbool[2]; 3270 3271 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3272 pcbddc->use_change_of_basis = PETSC_TRUE; 3273 if (!ISForEdges) { 3274 pcbddc->use_change_on_faces = PETSC_TRUE; 3275 } 3276 } 3277 tbool[0] = pcbddc->use_change_of_basis; 3278 tbool[1] = pcbddc->use_change_on_faces; 3279 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3280 pcbddc->use_change_of_basis = gbool[0]; 3281 pcbddc->use_change_on_faces = gbool[1]; 3282 } 3283 #endif 3284 3285 /* check if near null space is attached to global mat */ 3286 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3287 if (nearnullsp) { 3288 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3289 /* remove any stored info */ 3290 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3291 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3292 /* store information for BDDC solver reuse */ 3293 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3294 pcbddc->onearnullspace = nearnullsp; 3295 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3296 for (i=0;i<nnsp_size;i++) { 3297 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3298 } 3299 } else { /* if near null space is not provided BDDC uses constants by default */ 3300 nnsp_size = 0; 3301 nnsp_has_cnst = PETSC_TRUE; 3302 } 3303 /* get max number of constraints on a single cc */ 3304 max_constraints = nnsp_size; 3305 if (nnsp_has_cnst) max_constraints++; 3306 3307 /* 3308 Evaluate maximum storage size needed by the procedure 3309 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3310 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3311 There can be multiple constraints per connected component 3312 */ 3313 n_vertices = 0; 3314 if (ISForVertices) { 3315 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3316 } 3317 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3318 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3319 3320 total_counts = n_ISForFaces+n_ISForEdges; 3321 total_counts *= max_constraints; 3322 total_counts += n_vertices; 3323 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3324 3325 total_counts = 0; 3326 max_size_of_constraint = 0; 3327 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3328 IS used_is; 3329 if (i<n_ISForEdges) { 3330 used_is = ISForEdges[i]; 3331 } else { 3332 used_is = ISForFaces[i-n_ISForEdges]; 3333 } 3334 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3335 total_counts += j; 3336 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3337 } 3338 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3339 3340 /* get local part of global near null space vectors */ 3341 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3342 for (k=0;k<nnsp_size;k++) { 3343 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3344 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3345 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3346 } 3347 3348 /* whether or not to skip lapack calls */ 3349 skip_lapack = PETSC_TRUE; 3350 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3351 3352 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3353 if (!skip_lapack) { 3354 PetscScalar temp_work; 3355 3356 #if defined(PETSC_MISSING_LAPACK_GESVD) 3357 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3358 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3359 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3360 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3361 #if defined(PETSC_USE_COMPLEX) 3362 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3363 #endif 3364 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3365 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3366 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3367 lwork = -1; 3368 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3369 #if !defined(PETSC_USE_COMPLEX) 3370 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3371 #else 3372 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3373 #endif 3374 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3375 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3376 #else /* on missing GESVD */ 3377 /* SVD */ 3378 PetscInt max_n,min_n; 3379 max_n = max_size_of_constraint; 3380 min_n = max_constraints; 3381 if (max_size_of_constraint < max_constraints) { 3382 min_n = max_size_of_constraint; 3383 max_n = max_constraints; 3384 } 3385 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3386 #if defined(PETSC_USE_COMPLEX) 3387 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3388 #endif 3389 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3390 lwork = -1; 3391 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3392 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3393 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3394 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3395 #if !defined(PETSC_USE_COMPLEX) 3396 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3397 #else 3398 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3399 #endif 3400 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3401 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3402 #endif /* on missing GESVD */ 3403 /* Allocate optimal workspace */ 3404 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3405 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3406 } 3407 /* Now we can loop on constraining sets */ 3408 total_counts = 0; 3409 constraints_idxs_ptr[0] = 0; 3410 constraints_data_ptr[0] = 0; 3411 /* vertices */ 3412 if (n_vertices) { 3413 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3414 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3415 for (i=0;i<n_vertices;i++) { 3416 constraints_n[total_counts] = 1; 3417 constraints_data[total_counts] = 1.0; 3418 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3419 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3420 total_counts++; 3421 } 3422 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3423 n_vertices = total_counts; 3424 } 3425 3426 /* edges and faces */ 3427 total_counts_cc = total_counts; 3428 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3429 IS used_is; 3430 PetscBool idxs_copied = PETSC_FALSE; 3431 3432 if (ncc<n_ISForEdges) { 3433 used_is = ISForEdges[ncc]; 3434 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3435 } else { 3436 used_is = ISForFaces[ncc-n_ISForEdges]; 3437 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3438 } 3439 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3440 3441 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3442 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3443 /* change of basis should not be performed on local periodic nodes */ 3444 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3445 if (nnsp_has_cnst) { 3446 PetscScalar quad_value; 3447 3448 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3449 idxs_copied = PETSC_TRUE; 3450 3451 if (!pcbddc->use_nnsp_true) { 3452 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3453 } else { 3454 quad_value = 1.0; 3455 } 3456 for (j=0;j<size_of_constraint;j++) { 3457 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3458 } 3459 temp_constraints++; 3460 total_counts++; 3461 } 3462 for (k=0;k<nnsp_size;k++) { 3463 PetscReal real_value; 3464 PetscScalar *ptr_to_data; 3465 3466 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3467 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3468 for (j=0;j<size_of_constraint;j++) { 3469 ptr_to_data[j] = array[is_indices[j]]; 3470 } 3471 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3472 /* check if array is null on the connected component */ 3473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3474 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3475 if (real_value > 0.0) { /* keep indices and values */ 3476 temp_constraints++; 3477 total_counts++; 3478 if (!idxs_copied) { 3479 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3480 idxs_copied = PETSC_TRUE; 3481 } 3482 } 3483 } 3484 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3485 valid_constraints = temp_constraints; 3486 if (!pcbddc->use_nnsp_true && temp_constraints) { 3487 if (temp_constraints == 1) { /* just normalize the constraint */ 3488 PetscScalar norm,*ptr_to_data; 3489 3490 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3491 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3492 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3493 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3494 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3495 } else { /* perform SVD */ 3496 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3497 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3498 3499 #if defined(PETSC_MISSING_LAPACK_GESVD) 3500 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3501 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3502 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3503 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3504 from that computed using LAPACKgesvd 3505 -> This is due to a different computation of eigenvectors in LAPACKheev 3506 -> The quality of the POD-computed basis will be the same */ 3507 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3508 /* Store upper triangular part of correlation matrix */ 3509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3510 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3511 for (j=0;j<temp_constraints;j++) { 3512 for (k=0;k<j+1;k++) { 3513 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3514 } 3515 } 3516 /* compute eigenvalues and eigenvectors of correlation matrix */ 3517 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3518 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3519 #if !defined(PETSC_USE_COMPLEX) 3520 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3521 #else 3522 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3523 #endif 3524 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3525 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3526 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3527 j = 0; 3528 while (j < temp_constraints && singular_vals[j] < tol) j++; 3529 total_counts = total_counts-j; 3530 valid_constraints = temp_constraints-j; 3531 /* scale and copy POD basis into used quadrature memory */ 3532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3533 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3534 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3536 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3537 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3538 if (j<temp_constraints) { 3539 PetscInt ii; 3540 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3541 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3542 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3543 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3544 for (k=0;k<temp_constraints-j;k++) { 3545 for (ii=0;ii<size_of_constraint;ii++) { 3546 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3547 } 3548 } 3549 } 3550 #else /* on missing GESVD */ 3551 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3552 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3554 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3555 #if !defined(PETSC_USE_COMPLEX) 3556 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3557 #else 3558 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3559 #endif 3560 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3561 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3562 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3563 k = temp_constraints; 3564 if (k > size_of_constraint) k = size_of_constraint; 3565 j = 0; 3566 while (j < k && singular_vals[k-j-1] < tol) j++; 3567 valid_constraints = k-j; 3568 total_counts = total_counts-temp_constraints+valid_constraints; 3569 #endif /* on missing GESVD */ 3570 } 3571 } 3572 /* update pointers information */ 3573 if (valid_constraints) { 3574 constraints_n[total_counts_cc] = valid_constraints; 3575 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3576 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3577 /* set change_of_basis flag */ 3578 if (boolforchange) { 3579 PetscBTSet(change_basis,total_counts_cc); 3580 } 3581 total_counts_cc++; 3582 } 3583 } 3584 /* free workspace */ 3585 if (!skip_lapack) { 3586 ierr = PetscFree(work);CHKERRQ(ierr); 3587 #if defined(PETSC_USE_COMPLEX) 3588 ierr = PetscFree(rwork);CHKERRQ(ierr); 3589 #endif 3590 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3591 #if defined(PETSC_MISSING_LAPACK_GESVD) 3592 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3593 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3594 #endif 3595 } 3596 for (k=0;k<nnsp_size;k++) { 3597 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3598 } 3599 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3600 /* free index sets of faces, edges and vertices */ 3601 for (i=0;i<n_ISForFaces;i++) { 3602 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3603 } 3604 if (n_ISForFaces) { 3605 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3606 } 3607 for (i=0;i<n_ISForEdges;i++) { 3608 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3609 } 3610 if (n_ISForEdges) { 3611 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3612 } 3613 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3614 } else { 3615 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3616 3617 total_counts = 0; 3618 n_vertices = 0; 3619 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3620 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3621 } 3622 max_constraints = 0; 3623 total_counts_cc = 0; 3624 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3625 total_counts += pcbddc->adaptive_constraints_n[i]; 3626 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3627 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3628 } 3629 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3630 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3631 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3632 constraints_data = pcbddc->adaptive_constraints_data; 3633 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3634 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3635 total_counts_cc = 0; 3636 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3637 if (pcbddc->adaptive_constraints_n[i]) { 3638 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3639 } 3640 } 3641 #if 0 3642 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3643 for (i=0;i<total_counts_cc;i++) { 3644 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3645 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3646 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3647 printf(" %d",constraints_idxs[j]); 3648 } 3649 printf("\n"); 3650 printf("number of cc: %d\n",constraints_n[i]); 3651 } 3652 for (i=0;i<n_vertices;i++) { 3653 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3654 } 3655 for (i=0;i<sub_schurs->n_subs;i++) { 3656 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3657 } 3658 #endif 3659 3660 max_size_of_constraint = 0; 3661 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3662 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3663 /* Change of basis */ 3664 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3665 if (pcbddc->use_change_of_basis) { 3666 for (i=0;i<sub_schurs->n_subs;i++) { 3667 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3668 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3669 } 3670 } 3671 } 3672 } 3673 pcbddc->local_primal_size = total_counts; 3674 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3675 3676 /* map constraints_idxs in boundary numbering */ 3677 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3678 if (i != constraints_idxs_ptr[total_counts_cc]) { 3679 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3680 } 3681 3682 /* Create constraint matrix */ 3683 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3684 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3685 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3686 3687 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3688 /* determine if a QR strategy is needed for change of basis */ 3689 qr_needed = PETSC_FALSE; 3690 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3691 total_primal_vertices=0; 3692 pcbddc->local_primal_size_cc = 0; 3693 for (i=0;i<total_counts_cc;i++) { 3694 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3695 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3696 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3697 pcbddc->local_primal_size_cc += 1; 3698 } else if (PetscBTLookup(change_basis,i)) { 3699 for (k=0;k<constraints_n[i];k++) { 3700 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3701 } 3702 pcbddc->local_primal_size_cc += constraints_n[i]; 3703 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3704 PetscBTSet(qr_needed_idx,i); 3705 qr_needed = PETSC_TRUE; 3706 } 3707 } else { 3708 pcbddc->local_primal_size_cc += 1; 3709 } 3710 } 3711 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3712 pcbddc->n_vertices = total_primal_vertices; 3713 /* permute indices in order to have a sorted set of vertices */ 3714 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3715 3716 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3717 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3718 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3719 3720 /* nonzero structure of constraint matrix */ 3721 /* and get reference dof for local constraints */ 3722 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3723 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3724 3725 j = total_primal_vertices; 3726 total_counts = total_primal_vertices; 3727 cum = total_primal_vertices; 3728 for (i=n_vertices;i<total_counts_cc;i++) { 3729 if (!PetscBTLookup(change_basis,i)) { 3730 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3731 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3732 cum++; 3733 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3734 for (k=0;k<constraints_n[i];k++) { 3735 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3736 nnz[j+k] = size_of_constraint; 3737 } 3738 j += constraints_n[i]; 3739 } 3740 } 3741 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3742 ierr = PetscFree(nnz);CHKERRQ(ierr); 3743 3744 /* set values in constraint matrix */ 3745 for (i=0;i<total_primal_vertices;i++) { 3746 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3747 } 3748 total_counts = total_primal_vertices; 3749 for (i=n_vertices;i<total_counts_cc;i++) { 3750 if (!PetscBTLookup(change_basis,i)) { 3751 PetscInt *cols; 3752 3753 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3754 cols = constraints_idxs+constraints_idxs_ptr[i]; 3755 for (k=0;k<constraints_n[i];k++) { 3756 PetscInt row = total_counts+k; 3757 PetscScalar *vals; 3758 3759 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3760 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3761 } 3762 total_counts += constraints_n[i]; 3763 } 3764 } 3765 /* assembling */ 3766 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3767 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3768 3769 /* 3770 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3771 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3772 */ 3773 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3774 if (pcbddc->use_change_of_basis) { 3775 /* dual and primal dofs on a single cc */ 3776 PetscInt dual_dofs,primal_dofs; 3777 /* working stuff for GEQRF */ 3778 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3779 PetscBLASInt lqr_work; 3780 /* working stuff for UNGQR */ 3781 PetscScalar *gqr_work,lgqr_work_t; 3782 PetscBLASInt lgqr_work; 3783 /* working stuff for TRTRS */ 3784 PetscScalar *trs_rhs; 3785 PetscBLASInt Blas_NRHS; 3786 /* pointers for values insertion into change of basis matrix */ 3787 PetscInt *start_rows,*start_cols; 3788 PetscScalar *start_vals; 3789 /* working stuff for values insertion */ 3790 PetscBT is_primal; 3791 PetscInt *aux_primal_numbering_B; 3792 /* matrix sizes */ 3793 PetscInt global_size,local_size; 3794 /* temporary change of basis */ 3795 Mat localChangeOfBasisMatrix; 3796 /* extra space for debugging */ 3797 PetscScalar *dbg_work; 3798 3799 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3800 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3801 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3802 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3803 /* nonzeros for local mat */ 3804 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3805 for (i=0;i<pcis->n;i++) nnz[i]=1; 3806 for (i=n_vertices;i<total_counts_cc;i++) { 3807 if (PetscBTLookup(change_basis,i)) { 3808 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3809 if (PetscBTLookup(qr_needed_idx,i)) { 3810 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3811 } else { 3812 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3813 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3814 } 3815 } 3816 } 3817 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3818 ierr = PetscFree(nnz);CHKERRQ(ierr); 3819 /* Set initial identity in the matrix */ 3820 for (i=0;i<pcis->n;i++) { 3821 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3822 } 3823 3824 if (pcbddc->dbg_flag) { 3825 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3826 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3827 } 3828 3829 3830 /* Now we loop on the constraints which need a change of basis */ 3831 /* 3832 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3833 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3834 3835 Basic blocks of change of basis matrix T computed by 3836 3837 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3838 3839 | 1 0 ... 0 s_1/S | 3840 | 0 1 ... 0 s_2/S | 3841 | ... | 3842 | 0 ... 1 s_{n-1}/S | 3843 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3844 3845 with S = \sum_{i=1}^n s_i^2 3846 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3847 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3848 3849 - QR decomposition of constraints otherwise 3850 */ 3851 if (qr_needed) { 3852 /* space to store Q */ 3853 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3854 /* first we issue queries for optimal work */ 3855 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3856 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3857 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3858 lqr_work = -1; 3859 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3860 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3861 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3862 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3863 lgqr_work = -1; 3864 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3865 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3866 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3867 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3868 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3869 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3870 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3871 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3872 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3873 /* array to store scaling factors for reflectors */ 3874 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3875 /* array to store rhs and solution of triangular solver */ 3876 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3877 /* allocating workspace for check */ 3878 if (pcbddc->dbg_flag) { 3879 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3880 } 3881 } 3882 /* array to store whether a node is primal or not */ 3883 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3884 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3885 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3886 if (i != total_primal_vertices) { 3887 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3888 } 3889 for (i=0;i<total_primal_vertices;i++) { 3890 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3891 } 3892 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3893 3894 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3895 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3896 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3897 if (PetscBTLookup(change_basis,total_counts)) { 3898 /* get constraint info */ 3899 primal_dofs = constraints_n[total_counts]; 3900 dual_dofs = size_of_constraint-primal_dofs; 3901 3902 if (pcbddc->dbg_flag) { 3903 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3904 } 3905 3906 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3907 3908 /* copy quadrature constraints for change of basis check */ 3909 if (pcbddc->dbg_flag) { 3910 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3911 } 3912 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3913 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3914 3915 /* compute QR decomposition of constraints */ 3916 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3917 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3919 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3920 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3921 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3922 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3923 3924 /* explictly compute R^-T */ 3925 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3926 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3927 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3928 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3929 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3930 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3931 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3932 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3933 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3934 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3935 3936 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3937 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3938 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3939 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3940 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3941 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3942 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3943 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3944 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3945 3946 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3947 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3948 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3949 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3950 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3951 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3952 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3953 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3954 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3955 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3956 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3957 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3958 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3959 3960 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3961 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3962 /* insert cols for primal dofs */ 3963 for (j=0;j<primal_dofs;j++) { 3964 start_vals = &qr_basis[j*size_of_constraint]; 3965 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3966 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3967 } 3968 /* insert cols for dual dofs */ 3969 for (j=0,k=0;j<dual_dofs;k++) { 3970 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3971 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3972 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3973 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3974 j++; 3975 } 3976 } 3977 3978 /* check change of basis */ 3979 if (pcbddc->dbg_flag) { 3980 PetscInt ii,jj; 3981 PetscBool valid_qr=PETSC_TRUE; 3982 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3984 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3985 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3986 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3987 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3988 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3989 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3990 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3991 for (jj=0;jj<size_of_constraint;jj++) { 3992 for (ii=0;ii<primal_dofs;ii++) { 3993 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3994 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3995 } 3996 } 3997 if (!valid_qr) { 3998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3999 for (jj=0;jj<size_of_constraint;jj++) { 4000 for (ii=0;ii<primal_dofs;ii++) { 4001 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4002 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4003 } 4004 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4005 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4006 } 4007 } 4008 } 4009 } else { 4010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4011 } 4012 } 4013 } else { /* simple transformation block */ 4014 PetscInt row,col; 4015 PetscScalar val,norm; 4016 4017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4018 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4019 for (j=0;j<size_of_constraint;j++) { 4020 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4021 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4022 if (!PetscBTLookup(is_primal,row_B)) { 4023 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4024 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4025 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4026 } else { 4027 for (k=0;k<size_of_constraint;k++) { 4028 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4029 if (row != col) { 4030 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4031 } else { 4032 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4033 } 4034 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4035 } 4036 } 4037 } 4038 if (pcbddc->dbg_flag) { 4039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4040 } 4041 } 4042 } else { 4043 if (pcbddc->dbg_flag) { 4044 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4045 } 4046 } 4047 } 4048 4049 /* free workspace */ 4050 if (qr_needed) { 4051 if (pcbddc->dbg_flag) { 4052 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4053 } 4054 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4055 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4056 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4057 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4058 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4059 } 4060 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4061 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4062 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4063 4064 /* assembling of global change of variable */ 4065 if (!pcbddc->fake_change) { 4066 Mat tmat; 4067 PetscInt bs; 4068 4069 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4070 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4071 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4072 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4073 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4074 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4075 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4076 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4077 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4078 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4079 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4080 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4081 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4082 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4083 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4084 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4085 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4086 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4087 4088 /* check */ 4089 if (pcbddc->dbg_flag) { 4090 PetscReal error; 4091 Vec x,x_change; 4092 4093 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4094 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4095 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4096 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4097 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4098 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4099 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4100 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4101 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4102 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4103 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4104 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4106 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4107 ierr = VecDestroy(&x);CHKERRQ(ierr); 4108 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4109 } 4110 /* adapt sub_schurs computed (if any) */ 4111 if (pcbddc->use_deluxe_scaling) { 4112 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4113 4114 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4115 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4116 } 4117 if (sub_schurs->S_Ej_all) { 4118 Mat S_new,tmat; 4119 IS is_all_N,is_V_Sall = NULL; 4120 4121 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4122 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4123 if (pcbddc->deluxe_zerorows) { 4124 ISLocalToGlobalMapping NtoSall; 4125 IS is_V; 4126 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4127 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4128 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4129 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4130 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4131 } 4132 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4133 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4134 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4135 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4136 if (pcbddc->deluxe_zerorows) { 4137 const PetscScalar *array; 4138 const PetscInt *idxs_V,*idxs_all; 4139 PetscInt i,n_V; 4140 4141 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4142 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4143 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4144 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4145 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4146 for (i=0;i<n_V;i++) { 4147 PetscScalar val; 4148 PetscInt idx; 4149 4150 idx = idxs_V[i]; 4151 val = array[idxs_all[idxs_V[i]]]; 4152 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4153 } 4154 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4155 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4156 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4157 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4158 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4159 } 4160 sub_schurs->S_Ej_all = S_new; 4161 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4162 if (sub_schurs->sum_S_Ej_all) { 4163 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4164 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4165 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4166 if (pcbddc->deluxe_zerorows) { 4167 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4168 } 4169 sub_schurs->sum_S_Ej_all = S_new; 4170 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4171 } 4172 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4173 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4174 } 4175 } 4176 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4177 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4178 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4179 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4180 pcbddc->use_qr_single = qr_needed; 4181 } 4182 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4183 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4184 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4185 } 4186 4187 /* set up change of basis context */ 4188 if (pcbddc->ChangeOfBasisMatrix) { 4189 PCBDDCChange_ctx change_ctx; 4190 4191 if (!pcbddc->new_global_mat) { 4192 PetscInt global_size,local_size; 4193 4194 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4195 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4196 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4197 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4198 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4199 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4200 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4201 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4202 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4203 } else { 4204 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4205 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4206 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4207 } 4208 if (!pcbddc->user_ChangeOfBasisMatrix) { 4209 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4210 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4211 } else { 4212 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4213 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4214 } 4215 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4216 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4217 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4218 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4219 } else { 4220 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4221 } 4222 4223 if (!pcbddc->fake_change) { 4224 /* add pressure dofs to set of primal nodes for numbering purposes */ 4225 for (i=0;i<pcbddc->benign_n;i++) { 4226 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4227 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4228 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4229 pcbddc->local_primal_size_cc++; 4230 pcbddc->local_primal_size++; 4231 } 4232 4233 /* check if a new primal space has been introduced (also take into account benign trick) */ 4234 pcbddc->new_primal_space_local = PETSC_TRUE; 4235 if (olocal_primal_size == pcbddc->local_primal_size) { 4236 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4237 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4238 if (!pcbddc->new_primal_space_local) { 4239 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4240 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4241 } 4242 } 4243 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4244 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4245 } 4246 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4247 4248 /* flush dbg viewer */ 4249 if (pcbddc->dbg_flag) { 4250 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4251 } 4252 4253 /* free workspace */ 4254 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4255 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4256 if (!pcbddc->adaptive_selection) { 4257 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4258 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4259 } else { 4260 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4261 pcbddc->adaptive_constraints_idxs_ptr, 4262 pcbddc->adaptive_constraints_data_ptr, 4263 pcbddc->adaptive_constraints_idxs, 4264 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4265 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4266 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4267 } 4268 PetscFunctionReturn(0); 4269 } 4270 4271 #undef __FUNCT__ 4272 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4273 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4274 { 4275 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4276 PC_IS *pcis = (PC_IS*)pc->data; 4277 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4278 PetscInt ierr,i,vertex_size,N; 4279 PetscViewer viewer=pcbddc->dbg_viewer; 4280 4281 PetscFunctionBegin; 4282 /* Reset previously computed graph */ 4283 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4284 /* Init local Graph struct */ 4285 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4286 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4287 4288 /* Check validity of the csr graph passed in by the user */ 4289 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4291 } 4292 4293 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4294 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4295 PetscInt *xadj,*adjncy; 4296 PetscInt nvtxs; 4297 PetscBool flg_row=PETSC_FALSE; 4298 4299 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4300 if (flg_row) { 4301 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4302 pcbddc->computed_rowadj = PETSC_TRUE; 4303 } 4304 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4305 } 4306 if (pcbddc->dbg_flag) { 4307 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4308 } 4309 4310 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4311 vertex_size = 1; 4312 if (pcbddc->user_provided_isfordofs) { 4313 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4314 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4315 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4316 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4317 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4318 } 4319 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4320 pcbddc->n_ISForDofs = 0; 4321 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4322 } 4323 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4324 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4325 } else { 4326 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4327 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4328 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4329 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4330 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4331 } 4332 } 4333 } 4334 4335 /* Setup of Graph */ 4336 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4337 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4338 } 4339 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4340 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4341 } 4342 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4343 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4344 } 4345 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4346 4347 /* attach info on disconnected subdomains if present */ 4348 if (pcbddc->n_local_subs) { 4349 PetscInt *local_subs; 4350 4351 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4352 for (i=0;i<pcbddc->n_local_subs;i++) { 4353 const PetscInt *idxs; 4354 PetscInt nl,j; 4355 4356 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4357 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4358 for (j=0;j<nl;j++) { 4359 local_subs[idxs[j]] = i; 4360 } 4361 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4362 } 4363 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4364 pcbddc->mat_graph->local_subs = local_subs; 4365 } 4366 4367 /* Graph's connected components analysis */ 4368 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4369 4370 /* print some info to stdout */ 4371 if (pcbddc->dbg_flag) { 4372 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4373 } 4374 PetscFunctionReturn(0); 4375 } 4376 4377 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4378 #undef __FUNCT__ 4379 #define __FUNCT__ "PCBDDCSubsetNumbering" 4380 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4381 { 4382 PetscSF sf; 4383 PetscLayout map; 4384 const PetscInt *idxs; 4385 PetscInt *leaf_data,*root_data,*gidxs; 4386 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4387 PetscInt n_n,nlocals,start,first_index; 4388 PetscMPIInt commsize; 4389 PetscBool first_found; 4390 PetscErrorCode ierr; 4391 4392 PetscFunctionBegin; 4393 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4394 if (subset_mult) { 4395 PetscCheckSameComm(subset,1,subset_mult,2); 4396 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4397 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4398 } 4399 /* create workspace layout for computing global indices of subset */ 4400 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4401 lbounds[0] = lbounds[1] = 0; 4402 for (i=0;i<n;i++) { 4403 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4404 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4405 } 4406 lbounds[0] = -lbounds[0]; 4407 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4408 gbounds[0] = -gbounds[0]; 4409 N = gbounds[1] - gbounds[0] + 1; 4410 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4411 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4412 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4413 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4414 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4415 4416 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4417 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4418 if (subset_mult) { 4419 const PetscInt* idxs_mult; 4420 4421 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4422 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4423 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4424 } else { 4425 for (i=0;i<n;i++) leaf_data[i] = 1; 4426 } 4427 /* local size of new subset */ 4428 n_n = 0; 4429 for (i=0;i<n;i++) n_n += leaf_data[i]; 4430 4431 /* global indexes in layout */ 4432 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4433 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4434 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4435 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4436 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4437 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4438 4439 /* reduce from leaves to roots */ 4440 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4441 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4442 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4443 4444 /* count indexes in local part of layout */ 4445 nlocals = 0; 4446 first_index = -1; 4447 first_found = PETSC_FALSE; 4448 for (i=0;i<Nl;i++) { 4449 if (!first_found && root_data[i]) { 4450 first_found = PETSC_TRUE; 4451 first_index = i; 4452 } 4453 nlocals += root_data[i]; 4454 } 4455 4456 /* cumulative of number of indexes and size of subset without holes */ 4457 #if defined(PETSC_HAVE_MPI_EXSCAN) 4458 start = 0; 4459 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4460 #else 4461 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4462 start = start-nlocals; 4463 #endif 4464 4465 if (N_n) { /* compute total size of new subset if requested */ 4466 *N_n = start + nlocals; 4467 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4468 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4469 } 4470 4471 /* adapt root data with cumulative */ 4472 if (first_found) { 4473 PetscInt old_index; 4474 4475 root_data[first_index] += start; 4476 old_index = first_index; 4477 for (i=first_index+1;i<Nl;i++) { 4478 if (root_data[i]) { 4479 root_data[i] += root_data[old_index]; 4480 old_index = i; 4481 } 4482 } 4483 } 4484 4485 /* from roots to leaves */ 4486 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4487 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4488 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4489 4490 /* create new IS with global indexes without holes */ 4491 if (subset_mult) { 4492 const PetscInt* idxs_mult; 4493 PetscInt cum; 4494 4495 cum = 0; 4496 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4497 for (i=0;i<n;i++) { 4498 PetscInt j; 4499 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4500 } 4501 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4502 } else { 4503 for (i=0;i<n;i++) { 4504 gidxs[i] = leaf_data[i]-1; 4505 } 4506 } 4507 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4508 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4514 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4515 { 4516 PetscInt i,j; 4517 PetscScalar *alphas; 4518 PetscErrorCode ierr; 4519 4520 PetscFunctionBegin; 4521 /* this implements stabilized Gram-Schmidt */ 4522 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4523 for (i=0;i<n;i++) { 4524 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4525 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4526 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4527 } 4528 ierr = PetscFree(alphas);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 #undef __FUNCT__ 4533 #define __FUNCT__ "MatISGetSubassemblingPattern" 4534 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4535 { 4536 IS ranks_send_to; 4537 PetscInt n_neighs,*neighs,*n_shared,**shared; 4538 PetscMPIInt size,rank,color; 4539 PetscInt *xadj,*adjncy; 4540 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4541 PetscInt i,local_size,threshold=0; 4542 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4543 PetscSubcomm subcomm; 4544 PetscErrorCode ierr; 4545 4546 PetscFunctionBegin; 4547 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4548 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4549 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4550 4551 /* Get info on mapping */ 4552 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4553 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4554 4555 /* build local CSR graph of subdomains' connectivity */ 4556 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4557 xadj[0] = 0; 4558 xadj[1] = PetscMax(n_neighs-1,0); 4559 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4560 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4561 4562 if (threshold) { 4563 PetscInt xadj_count = 0; 4564 for (i=1;i<n_neighs;i++) { 4565 if (n_shared[i] > threshold) { 4566 adjncy[xadj_count] = neighs[i]; 4567 adjncy_wgt[xadj_count] = n_shared[i]; 4568 xadj_count++; 4569 } 4570 } 4571 xadj[1] = xadj_count; 4572 } else { 4573 if (xadj[1]) { 4574 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4575 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4576 } 4577 } 4578 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4579 if (use_square) { 4580 for (i=0;i<xadj[1];i++) { 4581 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4582 } 4583 } 4584 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4585 4586 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4587 4588 /* 4589 Restrict work on active processes only. 4590 */ 4591 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4592 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4593 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4594 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4595 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4596 if (color) { 4597 ierr = PetscFree(xadj);CHKERRQ(ierr); 4598 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4599 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4600 } else { 4601 Mat subdomain_adj; 4602 IS new_ranks,new_ranks_contig; 4603 MatPartitioning partitioner; 4604 PetscInt prank,rstart=0,rend=0; 4605 PetscInt *is_indices,*oldranks; 4606 PetscBool aggregate; 4607 4608 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4609 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4610 prank = rank; 4611 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4612 /* 4613 for (i=0;i<size;i++) { 4614 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4615 } 4616 */ 4617 for (i=0;i<xadj[1];i++) { 4618 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4619 } 4620 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4621 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4622 if (aggregate) { 4623 PetscInt lrows,row,ncols,*cols; 4624 PetscMPIInt nrank; 4625 PetscScalar *vals; 4626 4627 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4628 lrows = 0; 4629 if (nrank<redprocs) { 4630 lrows = size/redprocs; 4631 if (nrank<size%redprocs) lrows++; 4632 } 4633 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4634 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4635 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4636 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4637 row = nrank; 4638 ncols = xadj[1]-xadj[0]; 4639 cols = adjncy; 4640 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4641 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4642 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4643 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4644 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4645 ierr = PetscFree(xadj);CHKERRQ(ierr); 4646 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4647 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4648 ierr = PetscFree(vals);CHKERRQ(ierr); 4649 } else { 4650 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4651 } 4652 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4653 4654 /* Partition */ 4655 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4656 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4657 if (use_vwgt) { 4658 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4659 v_wgt[0] = local_size; 4660 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4661 } 4662 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4663 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4664 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4665 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4666 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4667 4668 /* renumber new_ranks to avoid "holes" in new set of processors */ 4669 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4670 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4671 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4672 if (!redprocs) { 4673 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4674 } else { 4675 PetscInt idxs[1]; 4676 PetscMPIInt tag; 4677 MPI_Request *reqs; 4678 4679 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4680 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4681 for (i=rstart;i<rend;i++) { 4682 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4683 } 4684 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4685 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4686 ierr = PetscFree(reqs);CHKERRQ(ierr); 4687 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4688 } 4689 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4690 /* clean up */ 4691 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4692 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4693 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4694 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4695 } 4696 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4697 4698 /* assemble parallel IS for sends */ 4699 i = 1; 4700 if (color) i=0; 4701 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4702 /* get back IS */ 4703 *is_sends = ranks_send_to; 4704 PetscFunctionReturn(0); 4705 } 4706 4707 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4708 4709 #undef __FUNCT__ 4710 #define __FUNCT__ "MatISSubassemble" 4711 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4712 { 4713 Mat local_mat; 4714 IS is_sends_internal; 4715 PetscInt rows,cols,new_local_rows; 4716 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4717 PetscBool ismatis,isdense,newisdense,destroy_mat; 4718 ISLocalToGlobalMapping l2gmap; 4719 PetscInt* l2gmap_indices; 4720 const PetscInt* is_indices; 4721 MatType new_local_type; 4722 /* buffers */ 4723 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4724 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4725 PetscInt *recv_buffer_idxs_local; 4726 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4727 /* MPI */ 4728 MPI_Comm comm,comm_n; 4729 PetscSubcomm subcomm; 4730 PetscMPIInt n_sends,n_recvs,commsize; 4731 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4732 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4733 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4734 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4735 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4736 PetscErrorCode ierr; 4737 4738 PetscFunctionBegin; 4739 /* TODO: add missing checks */ 4740 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4741 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4742 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4743 PetscValidLogicalCollectiveInt(mat,nis,7); 4744 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4745 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4746 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4747 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4748 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4749 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4750 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4751 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4752 PetscInt mrows,mcols,mnrows,mncols; 4753 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4754 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4755 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4756 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4757 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4758 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4759 } 4760 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4761 PetscValidLogicalCollectiveInt(mat,bs,0); 4762 /* prepare IS for sending if not provided */ 4763 if (!is_sends) { 4764 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4765 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4766 } else { 4767 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4768 is_sends_internal = is_sends; 4769 } 4770 4771 /* get comm */ 4772 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4773 4774 /* compute number of sends */ 4775 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4776 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4777 4778 /* compute number of receives */ 4779 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4780 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4781 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4782 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4783 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4784 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4785 ierr = PetscFree(iflags);CHKERRQ(ierr); 4786 4787 /* restrict comm if requested */ 4788 subcomm = 0; 4789 destroy_mat = PETSC_FALSE; 4790 if (restrict_comm) { 4791 PetscMPIInt color,subcommsize; 4792 4793 color = 0; 4794 if (restrict_full) { 4795 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4796 } else { 4797 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4798 } 4799 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4800 subcommsize = commsize - subcommsize; 4801 /* check if reuse has been requested */ 4802 if (reuse == MAT_REUSE_MATRIX) { 4803 if (*mat_n) { 4804 PetscMPIInt subcommsize2; 4805 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4806 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4807 comm_n = PetscObjectComm((PetscObject)*mat_n); 4808 } else { 4809 comm_n = PETSC_COMM_SELF; 4810 } 4811 } else { /* MAT_INITIAL_MATRIX */ 4812 PetscMPIInt rank; 4813 4814 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4815 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4816 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4817 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4818 comm_n = PetscSubcommChild(subcomm); 4819 } 4820 /* flag to destroy *mat_n if not significative */ 4821 if (color) destroy_mat = PETSC_TRUE; 4822 } else { 4823 comm_n = comm; 4824 } 4825 4826 /* prepare send/receive buffers */ 4827 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4828 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4829 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4830 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4831 if (nis) { 4832 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4833 } 4834 4835 /* Get data from local matrices */ 4836 if (!isdense) { 4837 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4838 /* TODO: See below some guidelines on how to prepare the local buffers */ 4839 /* 4840 send_buffer_vals should contain the raw values of the local matrix 4841 send_buffer_idxs should contain: 4842 - MatType_PRIVATE type 4843 - PetscInt size_of_l2gmap 4844 - PetscInt global_row_indices[size_of_l2gmap] 4845 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4846 */ 4847 } else { 4848 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4849 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4850 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4851 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4852 send_buffer_idxs[1] = i; 4853 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4854 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4855 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4856 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4857 for (i=0;i<n_sends;i++) { 4858 ilengths_vals[is_indices[i]] = len*len; 4859 ilengths_idxs[is_indices[i]] = len+2; 4860 } 4861 } 4862 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4863 /* additional is (if any) */ 4864 if (nis) { 4865 PetscMPIInt psum; 4866 PetscInt j; 4867 for (j=0,psum=0;j<nis;j++) { 4868 PetscInt plen; 4869 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4870 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4871 psum += len+1; /* indices + lenght */ 4872 } 4873 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4874 for (j=0,psum=0;j<nis;j++) { 4875 PetscInt plen; 4876 const PetscInt *is_array_idxs; 4877 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4878 send_buffer_idxs_is[psum] = plen; 4879 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4880 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4881 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4882 psum += plen+1; /* indices + lenght */ 4883 } 4884 for (i=0;i<n_sends;i++) { 4885 ilengths_idxs_is[is_indices[i]] = psum; 4886 } 4887 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4888 } 4889 4890 buf_size_idxs = 0; 4891 buf_size_vals = 0; 4892 buf_size_idxs_is = 0; 4893 for (i=0;i<n_recvs;i++) { 4894 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4895 buf_size_vals += (PetscInt)olengths_vals[i]; 4896 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4897 } 4898 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4899 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4900 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4901 4902 /* get new tags for clean communications */ 4903 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4904 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4905 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4906 4907 /* allocate for requests */ 4908 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4909 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4910 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4911 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4912 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4913 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4914 4915 /* communications */ 4916 ptr_idxs = recv_buffer_idxs; 4917 ptr_vals = recv_buffer_vals; 4918 ptr_idxs_is = recv_buffer_idxs_is; 4919 for (i=0;i<n_recvs;i++) { 4920 source_dest = onodes[i]; 4921 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4922 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4923 ptr_idxs += olengths_idxs[i]; 4924 ptr_vals += olengths_vals[i]; 4925 if (nis) { 4926 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4927 ptr_idxs_is += olengths_idxs_is[i]; 4928 } 4929 } 4930 for (i=0;i<n_sends;i++) { 4931 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4932 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4933 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4934 if (nis) { 4935 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4936 } 4937 } 4938 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4939 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4940 4941 /* assemble new l2g map */ 4942 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4943 ptr_idxs = recv_buffer_idxs; 4944 new_local_rows = 0; 4945 for (i=0;i<n_recvs;i++) { 4946 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4947 ptr_idxs += olengths_idxs[i]; 4948 } 4949 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4950 ptr_idxs = recv_buffer_idxs; 4951 new_local_rows = 0; 4952 for (i=0;i<n_recvs;i++) { 4953 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4954 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4955 ptr_idxs += olengths_idxs[i]; 4956 } 4957 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4958 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4959 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4960 4961 /* infer new local matrix type from received local matrices type */ 4962 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4963 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4964 if (n_recvs) { 4965 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4966 ptr_idxs = recv_buffer_idxs; 4967 for (i=0;i<n_recvs;i++) { 4968 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4969 new_local_type_private = MATAIJ_PRIVATE; 4970 break; 4971 } 4972 ptr_idxs += olengths_idxs[i]; 4973 } 4974 switch (new_local_type_private) { 4975 case MATDENSE_PRIVATE: 4976 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4977 new_local_type = MATSEQAIJ; 4978 bs = 1; 4979 } else { /* if I receive only 1 dense matrix */ 4980 new_local_type = MATSEQDENSE; 4981 bs = 1; 4982 } 4983 break; 4984 case MATAIJ_PRIVATE: 4985 new_local_type = MATSEQAIJ; 4986 bs = 1; 4987 break; 4988 case MATBAIJ_PRIVATE: 4989 new_local_type = MATSEQBAIJ; 4990 break; 4991 case MATSBAIJ_PRIVATE: 4992 new_local_type = MATSEQSBAIJ; 4993 break; 4994 default: 4995 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4996 break; 4997 } 4998 } else { /* by default, new_local_type is seqdense */ 4999 new_local_type = MATSEQDENSE; 5000 bs = 1; 5001 } 5002 5003 /* create MATIS object if needed */ 5004 if (reuse == MAT_INITIAL_MATRIX) { 5005 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5006 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5007 } else { 5008 /* it also destroys the local matrices */ 5009 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5010 } 5011 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5012 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5013 5014 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5015 5016 /* Global to local map of received indices */ 5017 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5018 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5019 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5020 5021 /* restore attributes -> type of incoming data and its size */ 5022 buf_size_idxs = 0; 5023 for (i=0;i<n_recvs;i++) { 5024 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5025 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5026 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5027 } 5028 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5029 5030 /* set preallocation */ 5031 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5032 if (!newisdense) { 5033 PetscInt *new_local_nnz=0; 5034 5035 ptr_vals = recv_buffer_vals; 5036 ptr_idxs = recv_buffer_idxs_local; 5037 if (n_recvs) { 5038 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5039 } 5040 for (i=0;i<n_recvs;i++) { 5041 PetscInt j; 5042 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5043 for (j=0;j<*(ptr_idxs+1);j++) { 5044 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5045 } 5046 } else { 5047 /* TODO */ 5048 } 5049 ptr_idxs += olengths_idxs[i]; 5050 } 5051 if (new_local_nnz) { 5052 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5053 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5054 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5055 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5056 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5057 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5058 } else { 5059 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5060 } 5061 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5062 } else { 5063 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5064 } 5065 5066 /* set values */ 5067 ptr_vals = recv_buffer_vals; 5068 ptr_idxs = recv_buffer_idxs_local; 5069 for (i=0;i<n_recvs;i++) { 5070 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5071 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5072 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5073 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5074 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5075 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5076 } else { 5077 /* TODO */ 5078 } 5079 ptr_idxs += olengths_idxs[i]; 5080 ptr_vals += olengths_vals[i]; 5081 } 5082 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5083 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5084 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5085 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5086 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5087 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5088 5089 #if 0 5090 if (!restrict_comm) { /* check */ 5091 Vec lvec,rvec; 5092 PetscReal infty_error; 5093 5094 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5095 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5096 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5097 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5098 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5099 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5100 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5101 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5102 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5103 } 5104 #endif 5105 5106 /* assemble new additional is (if any) */ 5107 if (nis) { 5108 PetscInt **temp_idxs,*count_is,j,psum; 5109 5110 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5111 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5112 ptr_idxs = recv_buffer_idxs_is; 5113 psum = 0; 5114 for (i=0;i<n_recvs;i++) { 5115 for (j=0;j<nis;j++) { 5116 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5117 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5118 psum += plen; 5119 ptr_idxs += plen+1; /* shift pointer to received data */ 5120 } 5121 } 5122 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5123 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5124 for (i=1;i<nis;i++) { 5125 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5126 } 5127 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5128 ptr_idxs = recv_buffer_idxs_is; 5129 for (i=0;i<n_recvs;i++) { 5130 for (j=0;j<nis;j++) { 5131 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5132 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5133 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5134 ptr_idxs += plen+1; /* shift pointer to received data */ 5135 } 5136 } 5137 for (i=0;i<nis;i++) { 5138 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5139 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5140 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5141 } 5142 ierr = PetscFree(count_is);CHKERRQ(ierr); 5143 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5144 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5145 } 5146 /* free workspace */ 5147 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5148 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5149 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5150 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5151 if (isdense) { 5152 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5153 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5154 } else { 5155 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5156 } 5157 if (nis) { 5158 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5159 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5160 } 5161 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5162 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5163 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5164 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5165 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5166 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5167 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5168 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5169 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5170 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5171 ierr = PetscFree(onodes);CHKERRQ(ierr); 5172 if (nis) { 5173 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5174 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5175 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5176 } 5177 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5178 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5179 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5180 for (i=0;i<nis;i++) { 5181 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5182 } 5183 *mat_n = NULL; 5184 } 5185 PetscFunctionReturn(0); 5186 } 5187 5188 /* temporary hack into ksp private data structure */ 5189 #include <petsc/private/kspimpl.h> 5190 5191 #undef __FUNCT__ 5192 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5193 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5194 { 5195 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5196 PC_IS *pcis = (PC_IS*)pc->data; 5197 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5198 MatNullSpace CoarseNullSpace=NULL; 5199 ISLocalToGlobalMapping coarse_islg; 5200 IS coarse_is,*isarray; 5201 PetscInt i,im_active=-1,active_procs=-1; 5202 PetscInt nis,nisdofs,nisneu,nisvert; 5203 PC pc_temp; 5204 PCType coarse_pc_type; 5205 KSPType coarse_ksp_type; 5206 PetscBool multilevel_requested,multilevel_allowed; 5207 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5208 Mat t_coarse_mat_is; 5209 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5210 PetscMPIInt all_procs; 5211 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5212 PetscBool compute_vecs = PETSC_FALSE; 5213 PetscScalar *array; 5214 PetscErrorCode ierr; 5215 5216 PetscFunctionBegin; 5217 /* Assign global numbering to coarse dofs */ 5218 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5219 PetscInt ocoarse_size; 5220 compute_vecs = PETSC_TRUE; 5221 ocoarse_size = pcbddc->coarse_size; 5222 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5223 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5224 /* see if we can avoid some work */ 5225 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5226 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5227 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5228 PC pc; 5229 PetscBool isbddc; 5230 5231 /* temporary workaround since PCBDDC does not have a reset method so far */ 5232 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5233 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5234 if (isbddc) { 5235 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5236 } else { 5237 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5238 } 5239 coarse_reuse = PETSC_FALSE; 5240 } else { /* we can safely reuse already computed coarse matrix */ 5241 coarse_reuse = PETSC_TRUE; 5242 } 5243 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5244 coarse_reuse = PETSC_FALSE; 5245 } 5246 /* reset any subassembling information */ 5247 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5248 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5249 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5250 coarse_reuse = PETSC_TRUE; 5251 } 5252 5253 /* count "active" (i.e. with positive local size) and "void" processes */ 5254 im_active = !!(pcis->n); 5255 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5256 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5257 void_procs = all_procs-active_procs; 5258 csin_type_simple = PETSC_TRUE; 5259 redist = PETSC_FALSE; 5260 if (pcbddc->current_level && void_procs) { 5261 csin_ml = PETSC_TRUE; 5262 ncoarse_ml = void_procs; 5263 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5264 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5265 csin_ds = PETSC_TRUE; 5266 ncoarse_ds = pcbddc->redistribute_coarse; 5267 redist = PETSC_TRUE; 5268 } else { 5269 csin_ds = PETSC_TRUE; 5270 ncoarse_ds = active_procs; 5271 redist = PETSC_TRUE; 5272 } 5273 } else { 5274 csin_ml = PETSC_FALSE; 5275 ncoarse_ml = all_procs; 5276 if (void_procs) { 5277 csin_ds = PETSC_TRUE; 5278 ncoarse_ds = void_procs; 5279 csin_type_simple = PETSC_FALSE; 5280 } else { 5281 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5282 csin_ds = PETSC_TRUE; 5283 ncoarse_ds = pcbddc->redistribute_coarse; 5284 redist = PETSC_TRUE; 5285 } else { 5286 csin_ds = PETSC_FALSE; 5287 ncoarse_ds = all_procs; 5288 } 5289 } 5290 } 5291 5292 /* 5293 test if we can go multilevel: three conditions must be satisfied: 5294 - we have not exceeded the number of levels requested 5295 - we can actually subassemble the active processes 5296 - we can find a suitable number of MPI processes where we can place the subassembled problem 5297 */ 5298 multilevel_allowed = PETSC_FALSE; 5299 multilevel_requested = PETSC_FALSE; 5300 if (pcbddc->current_level < pcbddc->max_levels) { 5301 multilevel_requested = PETSC_TRUE; 5302 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5303 multilevel_allowed = PETSC_FALSE; 5304 } else { 5305 multilevel_allowed = PETSC_TRUE; 5306 } 5307 } 5308 /* determine number of process partecipating to coarse solver */ 5309 if (multilevel_allowed) { 5310 ncoarse = ncoarse_ml; 5311 csin = csin_ml; 5312 redist = PETSC_FALSE; 5313 } else { 5314 ncoarse = ncoarse_ds; 5315 csin = csin_ds; 5316 } 5317 5318 /* creates temporary l2gmap and IS for coarse indexes */ 5319 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5320 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5321 5322 /* creates temporary MATIS object for coarse matrix */ 5323 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5324 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5325 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5326 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5327 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5328 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5329 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5330 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5331 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5332 5333 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5334 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5335 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5336 const PetscInt *idxs; 5337 ISLocalToGlobalMapping tmap; 5338 5339 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5340 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5341 /* allocate space for temporary storage */ 5342 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5343 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5344 /* allocate for IS array */ 5345 nisdofs = pcbddc->n_ISForDofsLocal; 5346 nisneu = !!pcbddc->NeumannBoundariesLocal; 5347 nisvert = 0; 5348 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5349 nisvert = 1; 5350 } 5351 nis = nisdofs + nisneu + nisvert; 5352 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5353 /* dofs splitting */ 5354 for (i=0;i<nisdofs;i++) { 5355 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5356 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5357 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5358 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5359 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5360 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5361 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5362 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5363 } 5364 /* neumann boundaries */ 5365 if (pcbddc->NeumannBoundariesLocal) { 5366 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5367 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5368 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5369 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5370 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5371 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5372 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5373 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5374 } 5375 /* primal vertices (benign) */ 5376 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5377 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5378 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5379 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5380 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5381 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5382 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5383 } 5384 /* free memory */ 5385 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5386 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5387 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5388 } else { 5389 nis = 0; 5390 nisdofs = 0; 5391 nisneu = 0; 5392 nisvert = 0; 5393 isarray = NULL; 5394 } 5395 /* destroy no longer needed map */ 5396 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5397 5398 /* restrict on coarse candidates (if needed) */ 5399 coarse_mat_is = NULL; 5400 if (csin) { 5401 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5402 if (redist) { 5403 PetscMPIInt rank; 5404 PetscInt spc,n_spc_p1,dest[1],destsize; 5405 5406 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5407 spc = active_procs/ncoarse; 5408 n_spc_p1 = active_procs%ncoarse; 5409 if (im_active) { 5410 destsize = 1; 5411 if (rank > n_spc_p1*(spc+1)-1) { 5412 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5413 } else { 5414 dest[0] = rank/(spc+1); 5415 } 5416 } else { 5417 destsize = 0; 5418 } 5419 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5420 } else if (csin_type_simple) { 5421 PetscMPIInt rank; 5422 PetscInt issize,isidx; 5423 5424 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5425 if (im_active) { 5426 issize = 1; 5427 isidx = (PetscInt)rank; 5428 } else { 5429 issize = 0; 5430 isidx = -1; 5431 } 5432 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5433 } else { /* get a suitable subassembling pattern from MATIS code */ 5434 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5435 } 5436 5437 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5438 if (!redist || ncoarse <= void_procs) { 5439 PetscInt ncoarse_cand,tissize,*nisindices; 5440 PetscInt *coarse_candidates; 5441 const PetscInt* tisindices; 5442 5443 /* get coarse candidates' ranks in pc communicator */ 5444 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5445 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5446 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5447 if (!coarse_candidates[i]) { 5448 coarse_candidates[ncoarse_cand++]=i; 5449 } 5450 } 5451 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5452 5453 5454 if (pcbddc->dbg_flag) { 5455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5457 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5458 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5459 for (i=0;i<ncoarse_cand;i++) { 5460 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5461 } 5462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5464 } 5465 /* shift the pattern on coarse candidates */ 5466 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5467 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5468 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5469 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5470 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5471 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5472 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5473 } 5474 if (pcbddc->dbg_flag) { 5475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5476 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5477 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5478 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5479 } 5480 } 5481 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5482 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5483 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5484 } else { /* this is the last level, so use just receiving processes in subcomm */ 5485 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5486 } 5487 } else { 5488 if (pcbddc->dbg_flag) { 5489 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5490 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5492 } 5493 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5494 coarse_mat_is = t_coarse_mat_is; 5495 } 5496 5497 /* create local to global scatters for coarse problem */ 5498 if (compute_vecs) { 5499 PetscInt lrows; 5500 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5501 if (coarse_mat_is) { 5502 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5503 } else { 5504 lrows = 0; 5505 } 5506 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5507 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5508 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5509 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5510 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5511 } 5512 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5513 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5514 5515 /* set defaults for coarse KSP and PC */ 5516 if (multilevel_allowed) { 5517 coarse_ksp_type = KSPRICHARDSON; 5518 coarse_pc_type = PCBDDC; 5519 } else { 5520 coarse_ksp_type = KSPPREONLY; 5521 coarse_pc_type = PCREDUNDANT; 5522 } 5523 5524 /* print some info if requested */ 5525 if (pcbddc->dbg_flag) { 5526 if (!multilevel_allowed) { 5527 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5528 if (multilevel_requested) { 5529 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5530 } else if (pcbddc->max_levels) { 5531 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5532 } 5533 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5534 } 5535 } 5536 5537 /* create the coarse KSP object only once with defaults */ 5538 if (coarse_mat_is) { 5539 MatReuse coarse_mat_reuse; 5540 PetscViewer dbg_viewer = NULL; 5541 if (pcbddc->dbg_flag) { 5542 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5543 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5544 } 5545 if (!pcbddc->coarse_ksp) { 5546 char prefix[256],str_level[16]; 5547 size_t len; 5548 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5549 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5550 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5551 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5552 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5553 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5554 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5555 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5556 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5557 /* prefix */ 5558 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5559 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5560 if (!pcbddc->current_level) { 5561 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5562 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5563 } else { 5564 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5565 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5566 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5567 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5568 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5569 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5570 } 5571 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5572 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5573 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5574 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5575 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5576 /* allow user customization */ 5577 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5578 } 5579 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5580 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5581 if (nisdofs) { 5582 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5583 for (i=0;i<nisdofs;i++) { 5584 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5585 } 5586 } 5587 if (nisneu) { 5588 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5589 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5590 } 5591 if (nisvert) { 5592 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5593 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5594 } 5595 5596 /* get some info after set from options */ 5597 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5598 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5599 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5600 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5601 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5602 isbddc = PETSC_FALSE; 5603 } 5604 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5605 if (isredundant) { 5606 KSP inner_ksp; 5607 PC inner_pc; 5608 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5609 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5610 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5611 } 5612 5613 /* assemble coarse matrix */ 5614 if (coarse_reuse) { 5615 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5616 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5617 coarse_mat_reuse = MAT_REUSE_MATRIX; 5618 } else { 5619 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5620 } 5621 if (isbddc || isnn) { 5622 if (isbddc) { /* currently there's no API for this */ 5623 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5624 pcbddc->detect_disconnected = PETSC_TRUE; 5625 } 5626 if (pcbddc->coarsening_ratio > 1) { 5627 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5628 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5629 if (pcbddc->dbg_flag) { 5630 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5631 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5632 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5633 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5634 } 5635 } 5636 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5637 } else { 5638 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5639 coarse_mat = coarse_mat_is; 5640 } 5641 } else { 5642 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5643 } 5644 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5645 5646 /* propagate symmetry info of coarse matrix */ 5647 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5648 if (pc->pmat->symmetric_set) { 5649 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5650 } 5651 if (pc->pmat->hermitian_set) { 5652 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5653 } 5654 if (pc->pmat->spd_set) { 5655 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5656 } 5657 /* set operators */ 5658 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5659 if (pcbddc->dbg_flag) { 5660 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5661 } 5662 } else { /* processes non partecipating to coarse solver (if any) */ 5663 coarse_mat = 0; 5664 } 5665 ierr = PetscFree(isarray);CHKERRQ(ierr); 5666 #if 0 5667 { 5668 PetscViewer viewer; 5669 char filename[256]; 5670 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5671 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5672 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5673 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5674 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5675 } 5676 #endif 5677 5678 /* Compute coarse null space (special handling by BDDC only) */ 5679 #if 0 5680 if (pcbddc->NullSpace) { 5681 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5682 } 5683 #endif 5684 /* hack */ 5685 if (pcbddc->coarse_ksp) { 5686 Vec crhs,csol; 5687 5688 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5689 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5690 if (!csol) { 5691 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5692 } 5693 if (!crhs) { 5694 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5695 } 5696 } 5697 5698 /* compute null space for coarse solver if the benign trick has been requested */ 5699 if (pcbddc->benign_null) { 5700 5701 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5702 for (i=0;i<pcbddc->benign_n;i++) { 5703 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5704 } 5705 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5706 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5707 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5708 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5709 if (coarse_mat) { 5710 Vec nullv; 5711 PetscScalar *array,*array2; 5712 PetscInt nl; 5713 5714 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5715 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5716 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5717 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5718 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5719 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5720 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5721 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5722 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5723 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5724 } 5725 } 5726 5727 if (pcbddc->coarse_ksp) { 5728 PetscBool ispreonly; 5729 5730 if (CoarseNullSpace) { 5731 PetscBool isnull; 5732 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5733 if (0) { 5734 if (isbddc && !pcbddc->benign_saddle_point) { 5735 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5736 } else { 5737 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5738 } 5739 } else { 5740 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5741 } 5742 } 5743 /* setup coarse ksp */ 5744 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5745 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5746 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5747 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5748 KSP check_ksp; 5749 KSPType check_ksp_type; 5750 PC check_pc; 5751 Vec check_vec,coarse_vec; 5752 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5753 PetscInt its; 5754 PetscBool compute_eigs; 5755 PetscReal *eigs_r,*eigs_c; 5756 PetscInt neigs; 5757 const char *prefix; 5758 5759 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5760 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5761 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5762 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5763 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5764 if (ispreonly) { 5765 check_ksp_type = KSPPREONLY; 5766 compute_eigs = PETSC_FALSE; 5767 } else { 5768 check_ksp_type = KSPGMRES; 5769 compute_eigs = PETSC_TRUE; 5770 } 5771 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5772 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5773 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5774 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5775 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5776 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5777 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5778 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5779 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5780 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5781 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5782 /* create random vec */ 5783 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5784 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5785 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5786 if (CoarseNullSpace) { 5787 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5788 } 5789 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5790 /* solve coarse problem */ 5791 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5792 if (CoarseNullSpace) { 5793 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5794 } 5795 /* set eigenvalue estimation if preonly has not been requested */ 5796 if (compute_eigs) { 5797 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5798 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5799 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5800 lambda_max = eigs_r[neigs-1]; 5801 lambda_min = eigs_r[0]; 5802 if (pcbddc->use_coarse_estimates) { 5803 if (lambda_max>lambda_min) { 5804 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5805 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5806 } 5807 } 5808 } 5809 5810 /* check coarse problem residual error */ 5811 if (pcbddc->dbg_flag) { 5812 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5813 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5814 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5815 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5816 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5817 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5818 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5819 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5820 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5821 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5822 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5823 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5824 if (CoarseNullSpace) { 5825 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5826 } 5827 if (compute_eigs) { 5828 PetscReal lambda_max_s,lambda_min_s; 5829 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5830 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5831 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5832 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5833 for (i=0;i<neigs;i++) { 5834 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5835 } 5836 } 5837 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5838 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5839 } 5840 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5841 if (compute_eigs) { 5842 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5843 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5844 } 5845 } 5846 } 5847 /* print additional info */ 5848 if (pcbddc->dbg_flag) { 5849 /* waits until all processes reaches this point */ 5850 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5851 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5852 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5853 } 5854 5855 /* free memory */ 5856 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5857 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5858 PetscFunctionReturn(0); 5859 } 5860 5861 #undef __FUNCT__ 5862 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5863 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5864 { 5865 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5866 PC_IS* pcis = (PC_IS*)pc->data; 5867 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5868 IS subset,subset_mult,subset_n; 5869 PetscInt local_size,coarse_size=0; 5870 PetscInt *local_primal_indices=NULL; 5871 const PetscInt *t_local_primal_indices; 5872 PetscErrorCode ierr; 5873 5874 PetscFunctionBegin; 5875 /* Compute global number of coarse dofs */ 5876 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5877 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5878 } 5879 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5880 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5881 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5882 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5883 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5884 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5885 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5886 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5887 if (local_size != pcbddc->local_primal_size) { 5888 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5889 } 5890 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5891 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5892 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5893 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5894 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5895 5896 /* check numbering */ 5897 if (pcbddc->dbg_flag) { 5898 PetscScalar coarsesum,*array,*array2; 5899 PetscInt i; 5900 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5901 5902 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5903 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5904 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5905 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5906 /* counter */ 5907 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5908 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5909 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5910 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5911 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5912 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5913 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5914 for (i=0;i<pcbddc->local_primal_size;i++) { 5915 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5916 } 5917 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5918 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5919 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5920 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5921 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5922 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5923 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5924 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5925 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5926 for (i=0;i<pcis->n;i++) { 5927 if (array[i] != 0.0 && array[i] != array2[i]) { 5928 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5929 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5930 set_error = PETSC_TRUE; 5931 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5932 } 5933 } 5934 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5935 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5936 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5937 for (i=0;i<pcis->n;i++) { 5938 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5939 } 5940 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5941 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5942 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5943 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5944 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5946 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5947 PetscInt *gidxs; 5948 5949 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5950 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5951 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5954 for (i=0;i<pcbddc->local_primal_size;i++) { 5955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5956 } 5957 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5958 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5959 } 5960 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5961 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5962 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5963 } 5964 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5965 /* get back data */ 5966 *coarse_size_n = coarse_size; 5967 *local_primal_indices_n = local_primal_indices; 5968 PetscFunctionReturn(0); 5969 } 5970 5971 #undef __FUNCT__ 5972 #define __FUNCT__ "PCBDDCGlobalToLocal" 5973 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5974 { 5975 IS localis_t; 5976 PetscInt i,lsize,*idxs,n; 5977 PetscScalar *vals; 5978 PetscErrorCode ierr; 5979 5980 PetscFunctionBegin; 5981 /* get indices in local ordering exploiting local to global map */ 5982 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5983 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5984 for (i=0;i<lsize;i++) vals[i] = 1.0; 5985 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5986 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5987 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5988 if (idxs) { /* multilevel guard */ 5989 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5990 } 5991 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5992 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5993 ierr = PetscFree(vals);CHKERRQ(ierr); 5994 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5995 /* now compute set in local ordering */ 5996 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5997 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5998 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5999 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6000 for (i=0,lsize=0;i<n;i++) { 6001 if (PetscRealPart(vals[i]) > 0.5) { 6002 lsize++; 6003 } 6004 } 6005 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6006 for (i=0,lsize=0;i<n;i++) { 6007 if (PetscRealPart(vals[i]) > 0.5) { 6008 idxs[lsize++] = i; 6009 } 6010 } 6011 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6012 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6013 *localis = localis_t; 6014 PetscFunctionReturn(0); 6015 } 6016 6017 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6018 #undef __FUNCT__ 6019 #define __FUNCT__ "PCBDDCMatMult_Private" 6020 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6021 { 6022 PCBDDCChange_ctx change_ctx; 6023 PetscErrorCode ierr; 6024 6025 PetscFunctionBegin; 6026 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6027 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6028 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6029 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6030 PetscFunctionReturn(0); 6031 } 6032 6033 #undef __FUNCT__ 6034 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6035 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6036 { 6037 PCBDDCChange_ctx change_ctx; 6038 PetscErrorCode ierr; 6039 6040 PetscFunctionBegin; 6041 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6042 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6043 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6044 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6045 PetscFunctionReturn(0); 6046 } 6047 6048 #undef __FUNCT__ 6049 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6050 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6051 { 6052 PC_IS *pcis=(PC_IS*)pc->data; 6053 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6054 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6055 Mat S_j; 6056 PetscInt *used_xadj,*used_adjncy; 6057 PetscBool free_used_adj; 6058 PetscErrorCode ierr; 6059 6060 PetscFunctionBegin; 6061 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6062 free_used_adj = PETSC_FALSE; 6063 if (pcbddc->sub_schurs_layers == -1) { 6064 used_xadj = NULL; 6065 used_adjncy = NULL; 6066 } else { 6067 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6068 used_xadj = pcbddc->mat_graph->xadj; 6069 used_adjncy = pcbddc->mat_graph->adjncy; 6070 } else if (pcbddc->computed_rowadj) { 6071 used_xadj = pcbddc->mat_graph->xadj; 6072 used_adjncy = pcbddc->mat_graph->adjncy; 6073 } else { 6074 PetscBool flg_row=PETSC_FALSE; 6075 const PetscInt *xadj,*adjncy; 6076 PetscInt nvtxs; 6077 6078 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6079 if (flg_row) { 6080 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6081 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6082 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6083 free_used_adj = PETSC_TRUE; 6084 } else { 6085 pcbddc->sub_schurs_layers = -1; 6086 used_xadj = NULL; 6087 used_adjncy = NULL; 6088 } 6089 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6090 } 6091 } 6092 6093 /* setup sub_schurs data */ 6094 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6095 if (!sub_schurs->schur_explicit) { 6096 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6097 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6098 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6099 } else { 6100 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6101 PetscBool isseqaij; 6102 PetscInt benign_n; 6103 Mat change = NULL; 6104 IS change_primal = NULL; 6105 6106 if (!pcbddc->use_vertices && reuse_solvers) { 6107 PetscInt n_vertices; 6108 6109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6110 reuse_solvers = (PetscBool)!n_vertices; 6111 } 6112 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6113 if (!isseqaij) { 6114 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6115 if (matis->A == pcbddc->local_mat) { 6116 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6117 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6118 } else { 6119 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6120 } 6121 } 6122 if (!pcbddc->benign_change_explicit) { 6123 benign_n = pcbddc->benign_n; 6124 } else { 6125 benign_n = 0; 6126 } 6127 /* If the user defines additional constraints, we import them here. 6128 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6129 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6130 PC_IS *pcisf; 6131 PC_BDDC *pcbddcf; 6132 PC pcf; 6133 6134 if (pcbddc->sub_schurs_rebuild) { 6135 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6136 } 6137 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6138 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6139 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6140 /* hacks */ 6141 pcisf = (PC_IS*)pcf->data; 6142 pcisf->is_B_local = pcis->is_B_local; 6143 pcisf->vec1_N = pcis->vec1_N; 6144 pcisf->BtoNmap = pcis->BtoNmap; 6145 pcisf->n = pcis->n; 6146 pcisf->n_B = pcis->n_B; 6147 pcbddcf = (PC_BDDC*)pcf->data; 6148 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6149 pcbddcf->mat_graph = pcbddc->mat_graph; 6150 pcbddcf->use_faces = PETSC_TRUE; 6151 pcbddcf->use_change_of_basis = PETSC_TRUE; 6152 pcbddcf->use_change_on_faces = PETSC_TRUE; 6153 pcbddcf->use_qr_single = PETSC_TRUE; 6154 pcbddcf->fake_change = PETSC_TRUE; 6155 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6156 /* store information on primal vertices and change of basis (in local numbering) */ 6157 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6158 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6159 change = pcbddcf->ConstraintMatrix; 6160 pcbddcf->ConstraintMatrix = NULL; 6161 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6162 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6163 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6164 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6165 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6166 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6167 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6168 pcf->ops->destroy = NULL; 6169 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6170 } 6171 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6172 ierr = MatDestroy(&change);CHKERRQ(ierr); 6173 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6174 } 6175 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6176 6177 /* free adjacency */ 6178 if (free_used_adj) { 6179 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6180 } 6181 PetscFunctionReturn(0); 6182 } 6183 6184 #undef __FUNCT__ 6185 #define __FUNCT__ "PCBDDCInitSubSchurs" 6186 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6187 { 6188 PC_IS *pcis=(PC_IS*)pc->data; 6189 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6190 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6191 PCBDDCGraph graph; 6192 PetscErrorCode ierr; 6193 6194 PetscFunctionBegin; 6195 /* attach interface graph for determining subsets */ 6196 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6197 IS verticesIS,verticescomm; 6198 PetscInt vsize,*idxs; 6199 6200 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6201 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6202 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6203 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6204 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6205 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6206 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6207 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6208 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6209 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6210 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6211 /* 6212 if (pcbddc->dbg_flag) { 6213 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6214 } 6215 */ 6216 } else { 6217 graph = pcbddc->mat_graph; 6218 } 6219 6220 /* sub_schurs init */ 6221 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6222 6223 /* free graph struct */ 6224 if (pcbddc->sub_schurs_rebuild) { 6225 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6226 } 6227 PetscFunctionReturn(0); 6228 } 6229 6230 #undef __FUNCT__ 6231 #define __FUNCT__ "PCBDDCCheckOperator" 6232 PetscErrorCode PCBDDCCheckOperator(PC pc) 6233 { 6234 PC_IS *pcis=(PC_IS*)pc->data; 6235 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6236 PetscErrorCode ierr; 6237 6238 PetscFunctionBegin; 6239 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6240 IS zerodiag = NULL; 6241 Mat S_j,B0_B=NULL; 6242 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6243 PetscScalar *p0_check,*array,*array2; 6244 PetscReal norm; 6245 PetscInt i; 6246 6247 /* B0 and B0_B */ 6248 if (zerodiag) { 6249 IS dummy; 6250 6251 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6252 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6253 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6254 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6255 } 6256 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6257 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6258 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6259 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6260 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6261 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6262 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6263 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6264 /* S_j */ 6265 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6266 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6267 6268 /* mimic vector in \widetilde{W}_\Gamma */ 6269 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6270 /* continuous in primal space */ 6271 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6272 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6273 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6274 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6275 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6276 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6277 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6278 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6279 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6280 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6281 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6282 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6283 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6284 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6285 6286 /* assemble rhs for coarse problem */ 6287 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6288 /* local with Schur */ 6289 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6290 if (zerodiag) { 6291 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6292 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6293 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6294 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6295 } 6296 /* sum on primal nodes the local contributions */ 6297 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6298 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6299 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6300 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6301 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6302 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6303 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6304 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6305 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6306 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6307 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6308 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6309 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6310 /* scale primal nodes (BDDC sums contibutions) */ 6311 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6312 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6313 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6314 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6315 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6316 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6317 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6318 /* global: \widetilde{B0}_B w_\Gamma */ 6319 if (zerodiag) { 6320 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6321 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6322 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6323 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6324 } 6325 /* BDDC */ 6326 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6327 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6328 6329 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6330 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6331 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6332 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6333 for (i=0;i<pcbddc->benign_n;i++) { 6334 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6335 } 6336 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6337 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6338 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6339 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6340 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6341 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6342 } 6343 PetscFunctionReturn(0); 6344 } 6345