1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 /* no-net-flux */ 650 if (pcbddc->benign_compute_nonetflux) { 651 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 652 MatNullSpace near_null_space; 653 ISLocalToGlobalMapping rmap; 654 Vec quad_vec; 655 PetscScalar *pvals; 656 PetscInt i,np,*dummyins; 657 658 /* create vector to hold quadrature weights */ 659 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 660 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 661 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 662 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 663 664 /* compute B^{(i)T} * 1_p */ 665 np = 0; 666 if (zerodiag) { 667 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 668 } else if (pressures) { 669 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 670 } 671 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 672 for (i=0;i<np;i++) pvals[i] = 1.; 673 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 674 if (np) { 675 const PetscInt *pidxs; 676 677 if (zerodiag) { 678 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 679 } else if (pressures) { 680 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 681 } 682 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 683 if (zerodiag) { 684 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 685 } else if (pressures) { 686 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 687 } 688 } 689 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 690 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 691 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 692 ierr = PetscFree(pvals);CHKERRQ(ierr); 693 694 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 695 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 696 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 697 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 698 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 699 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 700 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 701 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 702 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 703 for (i=0;i<matis->sf_nleaves;i++) { 704 if (PetscGlobalRank != matis->sf_leafdata[i]) { 705 pvals[i] = 0.; 706 } 707 } 708 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 709 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 710 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 711 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 712 ierr = PetscFree(dummyins);CHKERRQ(ierr); 713 714 /* assembly quadrature vec and attach near null space to pmat */ 715 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 716 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 717 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 718 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 719 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 720 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 721 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 722 } 723 724 /* change of basis and p0 dofs */ 725 if (has_null_pressures) { 726 IS zerodiagc; 727 const PetscInt *idxs,*idxsc; 728 PetscInt i,s,*nnz; 729 730 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 731 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 732 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 733 /* local change of basis for pressures */ 734 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 735 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 736 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 737 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 738 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 739 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 740 for (i=0;i<pcbddc->benign_n;i++) { 741 PetscInt nzs,j; 742 743 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 744 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 745 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 746 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 747 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 748 } 749 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 750 ierr = PetscFree(nnz);CHKERRQ(ierr); 751 /* set identity on velocities */ 752 for (i=0;i<n-nz;i++) { 753 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 754 } 755 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 756 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 757 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 758 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 759 /* set change on pressures */ 760 for (s=0;s<pcbddc->benign_n;s++) { 761 PetscScalar *array; 762 PetscInt nzs; 763 764 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 765 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 766 for (i=0;i<nzs-1;i++) { 767 PetscScalar vals[2]; 768 PetscInt cols[2]; 769 770 cols[0] = idxs[i]; 771 cols[1] = idxs[nzs-1]; 772 vals[0] = 1.; 773 vals[1] = 1.; 774 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 775 } 776 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 777 for (i=0;i<nzs-1;i++) array[i] = -1.; 778 array[nzs-1] = 1.; 779 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 780 /* store local idxs for p0 */ 781 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 782 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 783 ierr = PetscFree(array);CHKERRQ(ierr); 784 } 785 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 787 /* project if needed */ 788 if (pcbddc->benign_change_explicit) { 789 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 790 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 791 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 792 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 793 } 794 /* store global idxs for p0 */ 795 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 796 } 797 pcbddc->benign_zerodiag_subs = zerodiag_subs; 798 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 799 800 /* determines if the coarse solver will be singular or not */ 801 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 802 *zerodiaglocal = zerodiag; 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 808 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 809 { 810 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 if (!pcbddc->benign_sf) { 815 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 816 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 817 } 818 if (get) { /* use SF to get values */ 819 PetscScalar *array; 820 821 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 822 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 823 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 824 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 825 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 826 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 828 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 835 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 836 { 837 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 838 PetscErrorCode ierr; 839 840 PetscFunctionBegin; 841 /* TODO: add error checking 842 - avoid nested pop (or push) calls. 843 - cannot push before pop. 844 - cannot call this if pcbddc->local_mat is NULL 845 */ 846 if (!pcbddc->benign_n) { 847 PetscFunctionReturn(0); 848 } 849 if (pop) { 850 if (pcbddc->benign_change_explicit) { 851 IS is_p0; 852 MatReuse reuse; 853 854 /* extract B_0 */ 855 reuse = MAT_INITIAL_MATRIX; 856 if (pcbddc->benign_B0) { 857 reuse = MAT_REUSE_MATRIX; 858 } 859 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 861 /* remove rows and cols from local problem */ 862 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 863 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 864 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 865 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 866 } else { 867 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 868 PetscScalar *vals; 869 PetscInt i,n,*idxs_ins; 870 871 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 872 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 873 if (!pcbddc->benign_B0) { 874 PetscInt *nnz; 875 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 876 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 877 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 878 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 879 for (i=0;i<pcbddc->benign_n;i++) { 880 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 881 nnz[i] = n - nnz[i]; 882 } 883 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 884 ierr = PetscFree(nnz);CHKERRQ(ierr); 885 } 886 887 for (i=0;i<pcbddc->benign_n;i++) { 888 PetscScalar *array; 889 PetscInt *idxs,j,nz,cum; 890 891 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 892 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 893 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 for (j=0;j<nz;j++) vals[j] = 1.; 895 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 896 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 897 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 898 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 899 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 900 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 901 cum = 0; 902 for (j=0;j<n;j++) { 903 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 904 vals[cum] = array[j]; 905 idxs_ins[cum] = j; 906 cum++; 907 } 908 } 909 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 911 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 916 } 917 } else { /* push */ 918 if (pcbddc->benign_change_explicit) { 919 PetscInt i; 920 921 for (i=0;i<pcbddc->benign_n;i++) { 922 PetscScalar *B0_vals; 923 PetscInt *B0_cols,B0_ncol; 924 925 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 926 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 927 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 928 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 929 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 930 } 931 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 933 } else { 934 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "PCBDDCAdaptiveSelection" 942 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 943 { 944 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 945 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 946 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 947 PetscBLASInt *B_iwork,*B_ifail; 948 PetscScalar *work,lwork; 949 PetscScalar *St,*S,*eigv; 950 PetscScalar *Sarray,*Starray; 951 PetscReal *eigs,thresh; 952 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 953 PetscBool allocated_S_St; 954 #if defined(PETSC_USE_COMPLEX) 955 PetscReal *rwork; 956 #endif 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 if (!sub_schurs->schur_explicit) { 961 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 962 } 963 964 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 965 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 966 } 967 968 if (pcbddc->dbg_flag) { 969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 970 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 971 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 972 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 973 } 974 975 if (pcbddc->dbg_flag) { 976 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 977 } 978 979 /* max size of subsets */ 980 mss = 0; 981 for (i=0;i<sub_schurs->n_subs;i++) { 982 PetscInt subset_size; 983 984 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 985 mss = PetscMax(mss,subset_size); 986 } 987 988 /* min/max and threshold */ 989 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 990 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 991 nmax = PetscMax(nmin,nmax); 992 allocated_S_St = PETSC_FALSE; 993 if (nmin) { 994 allocated_S_St = PETSC_TRUE; 995 } 996 997 /* allocate lapack workspace */ 998 cum = cum2 = 0; 999 maxneigs = 0; 1000 for (i=0;i<sub_schurs->n_subs;i++) { 1001 PetscInt n,subset_size; 1002 1003 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1004 n = PetscMin(subset_size,nmax); 1005 cum += subset_size; 1006 cum2 += subset_size*n; 1007 maxneigs = PetscMax(maxneigs,n); 1008 } 1009 if (mss) { 1010 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1011 PetscBLASInt B_itype = 1; 1012 PetscBLASInt B_N = mss; 1013 PetscReal zero = 0.0; 1014 PetscReal eps = 0.0; /* dlamch? */ 1015 1016 B_lwork = -1; 1017 S = NULL; 1018 St = NULL; 1019 eigs = NULL; 1020 eigv = NULL; 1021 B_iwork = NULL; 1022 B_ifail = NULL; 1023 #if defined(PETSC_USE_COMPLEX) 1024 rwork = NULL; 1025 #endif 1026 thresh = 1.0; 1027 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1028 #if defined(PETSC_USE_COMPLEX) 1029 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1030 #else 1031 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1032 #endif 1033 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1034 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1035 } else { 1036 /* TODO */ 1037 } 1038 } else { 1039 lwork = 0; 1040 } 1041 1042 nv = 0; 1043 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1044 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1045 } 1046 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1047 if (allocated_S_St) { 1048 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1049 } 1050 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1051 #if defined(PETSC_USE_COMPLEX) 1052 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1053 #endif 1054 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1055 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1056 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1057 nv+cum,&pcbddc->adaptive_constraints_idxs, 1058 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1059 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1060 1061 maxneigs = 0; 1062 cum = cumarray = 0; 1063 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1064 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1065 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1066 const PetscInt *idxs; 1067 1068 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1069 for (cum=0;cum<nv;cum++) { 1070 pcbddc->adaptive_constraints_n[cum] = 1; 1071 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1072 pcbddc->adaptive_constraints_data[cum] = 1.0; 1073 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1074 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1075 } 1076 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1077 } 1078 1079 if (mss) { /* multilevel */ 1080 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1081 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1082 } 1083 1084 thresh = pcbddc->adaptive_threshold; 1085 for (i=0;i<sub_schurs->n_subs;i++) { 1086 const PetscInt *idxs; 1087 PetscReal infty = PETSC_MAX_REAL; 1088 PetscInt j,subset_size,eigs_start = 0; 1089 PetscBLASInt B_N; 1090 PetscBool same_data = PETSC_FALSE; 1091 1092 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1093 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1094 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1095 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1096 if (sub_schurs->is_hermitian) { 1097 PetscInt j,k; 1098 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1099 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1100 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1101 } 1102 for (j=0;j<subset_size;j++) { 1103 for (k=j;k<subset_size;k++) { 1104 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1105 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1106 } 1107 } 1108 } else { 1109 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1110 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1111 } 1112 } else { 1113 S = Sarray + cumarray; 1114 St = Starray + cumarray; 1115 } 1116 /* see if we can save some work */ 1117 if (sub_schurs->n_subs == 1) { 1118 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1119 } 1120 1121 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1122 B_neigs = 0; 1123 } else { 1124 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1125 PetscBLASInt B_itype = 1; 1126 PetscBLASInt B_IL, B_IU; 1127 PetscReal eps = -1.0; /* dlamch? */ 1128 PetscInt nmin_s; 1129 1130 if (pcbddc->dbg_flag) { 1131 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1132 } 1133 1134 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1135 if (thresh > 1.+PETSC_SMALL) { 1136 1137 /* ask for eigenvalues larger than thresh */ 1138 #if defined(PETSC_USE_COMPLEX) 1139 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1140 #else 1141 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1142 #endif 1143 } else { 1144 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1145 B_IL = 1; 1146 #if defined(PETSC_USE_COMPLEX) 1147 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1148 #else 1149 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1150 #endif 1151 } 1152 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1153 if (B_ierr) { 1154 if (B_ierr < 0 ) { 1155 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1156 } else if (B_ierr <= B_N) { 1157 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1158 } else { 1159 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1160 } 1161 } 1162 1163 if (B_neigs > nmax) { 1164 if (pcbddc->dbg_flag) { 1165 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1166 } 1167 eigs_start = B_neigs -nmax; 1168 B_neigs = nmax; 1169 } 1170 1171 nmin_s = PetscMin(nmin,B_N); 1172 if (B_neigs < nmin_s) { 1173 PetscBLASInt B_neigs2; 1174 1175 B_IU = B_N - B_neigs; 1176 B_IL = B_N - nmin_s + 1; 1177 if (pcbddc->dbg_flag) { 1178 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1179 } 1180 if (sub_schurs->is_hermitian) { 1181 PetscInt j; 1182 for (j=0;j<subset_size;j++) { 1183 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1184 } 1185 for (j=0;j<subset_size;j++) { 1186 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1187 } 1188 } else { 1189 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1190 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1191 } 1192 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1193 #if defined(PETSC_USE_COMPLEX) 1194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1195 #else 1196 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1197 #endif 1198 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1199 B_neigs += B_neigs2; 1200 } 1201 if (B_ierr) { 1202 if (B_ierr < 0 ) { 1203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1204 } else if (B_ierr <= B_N) { 1205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1206 } else { 1207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1208 } 1209 } 1210 if (pcbddc->dbg_flag) { 1211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1212 for (j=0;j<B_neigs;j++) { 1213 if (eigs[j] == 0.0) { 1214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1217 } 1218 } 1219 } 1220 } else { 1221 /* TODO */ 1222 } 1223 } 1224 /* change the basis back to the original one */ 1225 if (sub_schurs->change) { 1226 Mat change,phi,phit; 1227 1228 if (pcbddc->dbg_flag > 1) { 1229 PetscInt ii; 1230 for (ii=0;ii<B_neigs;ii++) { 1231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1232 for (j=0;j<B_N;j++) { 1233 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1234 } 1235 } 1236 } 1237 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1238 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1239 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1240 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1241 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1242 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1243 } 1244 maxneigs = PetscMax(B_neigs,maxneigs); 1245 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1246 if (B_neigs) { 1247 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1248 1249 if (pcbddc->dbg_flag > 1) { 1250 PetscInt ii; 1251 for (ii=0;ii<B_neigs;ii++) { 1252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1253 for (j=0;j<B_N;j++) { 1254 #if defined(PETSC_USE_COMPLEX) 1255 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1256 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1258 #else 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1260 #endif 1261 } 1262 } 1263 } 1264 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1265 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1266 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1267 cum++; 1268 } 1269 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1270 /* shift for next computation */ 1271 cumarray += subset_size*subset_size; 1272 } 1273 if (pcbddc->dbg_flag) { 1274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1275 } 1276 1277 if (mss) { 1278 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1279 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1280 /* destroy matrices (junk) */ 1281 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1282 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1283 } 1284 if (allocated_S_St) { 1285 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1288 #if defined(PETSC_USE_COMPLEX) 1289 ierr = PetscFree(rwork);CHKERRQ(ierr); 1290 #endif 1291 if (pcbddc->dbg_flag) { 1292 PetscInt maxneigs_r; 1293 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "PCBDDCSetUpSolvers" 1301 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1302 { 1303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1304 PetscScalar *coarse_submat_vals; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1309 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1310 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1311 1312 /* Setup local neumann solver ksp_R */ 1313 /* PCBDDCSetUpLocalScatters should be called first! */ 1314 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1315 1316 /* Change global null space passed in by the user if change of basis has been requested */ 1317 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1318 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1319 } 1320 1321 /* 1322 Setup local correction and local part of coarse basis. 1323 Gives back the dense local part of the coarse matrix in column major ordering 1324 */ 1325 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1326 1327 /* Compute total number of coarse nodes and setup coarse solver */ 1328 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1329 1330 /* free */ 1331 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "PCBDDCResetCustomization" 1337 PetscErrorCode PCBDDCResetCustomization(PC pc) 1338 { 1339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1344 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1345 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1346 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1347 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1348 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1349 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1350 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1351 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1352 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1353 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1354 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "PCBDDCResetTopography" 1360 PetscErrorCode PCBDDCResetTopography(PC pc) 1361 { 1362 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1363 PetscInt i; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1368 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1369 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1370 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1371 for (i=0;i<pcbddc->n_local_subs;i++) { 1372 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1373 } 1374 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1375 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "PCBDDCResetSolvers" 1381 PetscErrorCode PCBDDCResetSolvers(PC pc) 1382 { 1383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1388 if (pcbddc->coarse_phi_B) { 1389 PetscScalar *array; 1390 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1391 ierr = PetscFree(array);CHKERRQ(ierr); 1392 } 1393 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1394 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1395 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1396 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1397 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1398 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1399 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1400 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1401 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1402 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1403 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1404 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1405 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1406 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1407 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1408 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1409 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1410 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1411 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1412 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1413 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1414 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1415 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1416 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1417 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1418 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1419 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1420 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1421 if (pcbddc->benign_zerodiag_subs) { 1422 PetscInt i; 1423 for (i=0;i<pcbddc->benign_n;i++) { 1424 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1427 } 1428 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1434 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1435 { 1436 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1437 PC_IS *pcis = (PC_IS*)pc->data; 1438 VecType impVecType; 1439 PetscInt n_constraints,n_R,old_size; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 if (!pcbddc->ConstraintMatrix) { 1444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1445 } 1446 /* get sizes */ 1447 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1448 n_R = pcis->n - pcbddc->n_vertices; 1449 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1450 /* local work vectors (try to avoid unneeded work)*/ 1451 /* R nodes */ 1452 old_size = -1; 1453 if (pcbddc->vec1_R) { 1454 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1455 } 1456 if (n_R != old_size) { 1457 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1458 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1459 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1460 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1461 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1462 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1463 } 1464 /* local primal dofs */ 1465 old_size = -1; 1466 if (pcbddc->vec1_P) { 1467 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1468 } 1469 if (pcbddc->local_primal_size != old_size) { 1470 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1471 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1472 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1473 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1474 } 1475 /* local explicit constraints */ 1476 old_size = -1; 1477 if (pcbddc->vec1_C) { 1478 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1479 } 1480 if (n_constraints && n_constraints != old_size) { 1481 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1483 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1484 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1485 } 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "PCBDDCSetUpCorrection" 1491 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1492 { 1493 PetscErrorCode ierr; 1494 /* pointers to pcis and pcbddc */ 1495 PC_IS* pcis = (PC_IS*)pc->data; 1496 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1497 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1498 /* submatrices of local problem */ 1499 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1500 /* submatrices of local coarse problem */ 1501 Mat S_VV,S_CV,S_VC,S_CC; 1502 /* working matrices */ 1503 Mat C_CR; 1504 /* additional working stuff */ 1505 PC pc_R; 1506 Mat F; 1507 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1508 1509 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1510 PetscScalar *work; 1511 PetscInt *idx_V_B; 1512 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1513 PetscInt i,n_R,n_D,n_B; 1514 1515 /* some shortcuts to scalars */ 1516 PetscScalar one=1.0,m_one=-1.0; 1517 1518 PetscFunctionBegin; 1519 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1520 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1521 } 1522 1523 /* Set Non-overlapping dimensions */ 1524 n_vertices = pcbddc->n_vertices; 1525 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1526 n_B = pcis->n_B; 1527 n_D = pcis->n - n_B; 1528 n_R = pcis->n - n_vertices; 1529 1530 /* vertices in boundary numbering */ 1531 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1532 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1533 if (i != n_vertices) { 1534 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1535 } 1536 1537 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1538 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1540 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1542 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1544 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1545 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1546 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1547 1548 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1549 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1550 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1551 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1552 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1553 lda_rhs = n_R; 1554 need_benign_correction = PETSC_FALSE; 1555 if (isLU || isILU || isCHOL) { 1556 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1557 } else if (sub_schurs->reuse_solver) { 1558 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1559 MatFactorType type; 1560 1561 F = reuse_solver->F; 1562 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1563 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1564 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1565 need_benign_correction = !!reuse_solver->benign_n; 1566 } else { 1567 F = NULL; 1568 } 1569 1570 /* allocate workspace */ 1571 n = 0; 1572 if (n_constraints) { 1573 n += lda_rhs*n_constraints; 1574 } 1575 if (n_vertices) { 1576 n = PetscMax(2*lda_rhs*n_vertices,n); 1577 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1578 } 1579 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1580 1581 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1582 if (n_constraints) { 1583 Mat M1,M2,M3,C_B; 1584 IS is_aux; 1585 PetscScalar *array,*array2; 1586 1587 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1588 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1589 1590 /* Extract constraints on R nodes: C_{CR} */ 1591 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1592 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1593 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1594 1595 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1596 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1597 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1598 for (i=0;i<n_constraints;i++) { 1599 const PetscScalar *row_cmat_values; 1600 const PetscInt *row_cmat_indices; 1601 PetscInt size_of_constraint,j; 1602 1603 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1604 for (j=0;j<size_of_constraint;j++) { 1605 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1606 } 1607 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1608 } 1609 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1610 if (F) { 1611 Mat B; 1612 1613 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1614 if (need_benign_correction) { 1615 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1616 1617 /* rhs is already zero on interior dofs, no need to change the rhs */ 1618 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1619 } 1620 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1621 if (need_benign_correction) { 1622 PetscScalar *marr; 1623 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1624 1625 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1626 for (i=0;i<n_constraints;i++) { 1627 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1628 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1630 } 1631 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1632 } 1633 ierr = MatDestroy(&B);CHKERRQ(ierr); 1634 } else { 1635 PetscScalar *marr; 1636 1637 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1638 for (i=0;i<n_constraints;i++) { 1639 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1640 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1641 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1642 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1643 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1644 } 1645 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1646 } 1647 if (!pcbddc->switch_static) { 1648 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1649 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1650 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1651 for (i=0;i<n_constraints;i++) { 1652 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1653 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1654 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1655 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1658 } 1659 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1660 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1661 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1662 } else { 1663 if (lda_rhs != n_R) { 1664 IS dummy; 1665 1666 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1667 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1668 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1669 } else { 1670 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1671 pcbddc->local_auxmat2 = local_auxmat2_R; 1672 } 1673 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1674 } 1675 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1676 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1677 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1678 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1679 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1680 if (isCHOL) { 1681 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1682 } else { 1683 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1684 } 1685 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1686 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1687 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1688 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1689 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1690 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1691 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1692 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1693 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1694 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1695 } 1696 1697 /* Get submatrices from subdomain matrix */ 1698 if (n_vertices) { 1699 IS is_aux; 1700 1701 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1702 IS tis; 1703 1704 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1705 ierr = ISSort(tis);CHKERRQ(ierr); 1706 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1707 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1708 } else { 1709 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1710 } 1711 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1712 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1713 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1714 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1715 } 1716 1717 /* Matrix of coarse basis functions (local) */ 1718 if (pcbddc->coarse_phi_B) { 1719 PetscInt on_B,on_primal,on_D=n_D; 1720 if (pcbddc->coarse_phi_D) { 1721 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1722 } 1723 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1724 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1725 PetscScalar *marray; 1726 1727 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1728 ierr = PetscFree(marray);CHKERRQ(ierr); 1729 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1730 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1731 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1732 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1733 } 1734 } 1735 1736 if (!pcbddc->coarse_phi_B) { 1737 PetscScalar *marray; 1738 1739 n = n_B*pcbddc->local_primal_size; 1740 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1741 n += n_D*pcbddc->local_primal_size; 1742 } 1743 if (!pcbddc->symmetric_primal) { 1744 n *= 2; 1745 } 1746 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1748 n = n_B*pcbddc->local_primal_size; 1749 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1751 n += n_D*pcbddc->local_primal_size; 1752 } 1753 if (!pcbddc->symmetric_primal) { 1754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1755 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1756 n = n_B*pcbddc->local_primal_size; 1757 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1758 } 1759 } else { 1760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1761 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1763 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1764 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1765 } 1766 } 1767 } 1768 1769 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1770 p0_lidx_I = NULL; 1771 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1772 const PetscInt *idxs; 1773 1774 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1775 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1776 for (i=0;i<pcbddc->benign_n;i++) { 1777 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1778 } 1779 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1780 } 1781 1782 /* vertices */ 1783 if (n_vertices) { 1784 1785 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1786 1787 if (n_R) { 1788 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1789 PetscBLASInt B_N,B_one = 1; 1790 PetscScalar *x,*y; 1791 PetscBool isseqaij; 1792 1793 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1794 if (need_benign_correction) { 1795 ISLocalToGlobalMapping RtoN; 1796 IS is_p0; 1797 PetscInt *idxs_p0,n; 1798 1799 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1800 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1801 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1802 if (n != pcbddc->benign_n) { 1803 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1804 } 1805 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1806 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1807 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1808 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1809 } 1810 1811 if (lda_rhs == n_R) { 1812 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1813 } else { 1814 PetscScalar *av,*array; 1815 const PetscInt *xadj,*adjncy; 1816 PetscInt n; 1817 PetscBool flg_row; 1818 1819 array = work+lda_rhs*n_vertices; 1820 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1821 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1822 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1823 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1824 for (i=0;i<n;i++) { 1825 PetscInt j; 1826 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1827 } 1828 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1829 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1830 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1831 } 1832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1833 if (need_benign_correction) { 1834 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1835 PetscScalar *marr; 1836 1837 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1838 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1839 1840 | 0 0 0 | (V) 1841 L = | 0 0 -1 | (P-p0) 1842 | 0 0 -1 | (p0) 1843 1844 */ 1845 for (i=0;i<reuse_solver->benign_n;i++) { 1846 const PetscScalar *vals; 1847 const PetscInt *idxs,*idxs_zero; 1848 PetscInt n,j,nz; 1849 1850 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1851 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1852 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1853 for (j=0;j<n;j++) { 1854 PetscScalar val = vals[j]; 1855 PetscInt k,col = idxs[j]; 1856 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1857 } 1858 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1859 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1860 } 1861 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1862 } 1863 if (F) { 1864 /* need to correct the rhs */ 1865 if (need_benign_correction) { 1866 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1867 PetscScalar *marr; 1868 1869 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1870 for (i=0;i<n_vertices;i++) { 1871 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1872 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1873 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1874 } 1875 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1876 } 1877 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1878 /* need to correct the solution */ 1879 if (need_benign_correction) { 1880 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1881 PetscScalar *marr; 1882 1883 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1884 for (i=0;i<n_vertices;i++) { 1885 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1886 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1887 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1888 } 1889 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1890 } 1891 } else { 1892 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1893 for (i=0;i<n_vertices;i++) { 1894 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1895 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1896 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1897 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1898 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1899 } 1900 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1901 } 1902 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1903 /* S_VV and S_CV */ 1904 if (n_constraints) { 1905 Mat B; 1906 1907 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1908 for (i=0;i<n_vertices;i++) { 1909 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1910 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1911 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1912 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1913 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1914 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1915 } 1916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1917 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1918 ierr = MatDestroy(&B);CHKERRQ(ierr); 1919 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1920 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1921 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1922 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1923 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1924 ierr = MatDestroy(&B);CHKERRQ(ierr); 1925 } 1926 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1927 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1928 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1929 } 1930 if (lda_rhs != n_R) { 1931 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1932 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1933 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1934 } 1935 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1936 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1937 if (need_benign_correction) { 1938 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1939 PetscScalar *marr,*sums; 1940 1941 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1942 ierr = MatDenseGetArray(S_VVt,&marr); 1943 for (i=0;i<reuse_solver->benign_n;i++) { 1944 const PetscScalar *vals; 1945 const PetscInt *idxs,*idxs_zero; 1946 PetscInt n,j,nz; 1947 1948 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1949 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1950 for (j=0;j<n_vertices;j++) { 1951 PetscInt k; 1952 sums[j] = 0.; 1953 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1954 } 1955 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1956 for (j=0;j<n;j++) { 1957 PetscScalar val = vals[j]; 1958 PetscInt k; 1959 for (k=0;k<n_vertices;k++) { 1960 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1961 } 1962 } 1963 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1964 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1965 } 1966 ierr = PetscFree(sums);CHKERRQ(ierr); 1967 ierr = MatDenseRestoreArray(S_VVt,&marr); 1968 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1969 } 1970 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1972 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1973 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1974 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1975 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1976 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1977 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1978 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1979 } else { 1980 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1981 } 1982 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1983 1984 /* coarse basis functions */ 1985 for (i=0;i<n_vertices;i++) { 1986 PetscScalar *y; 1987 1988 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1989 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1990 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1991 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1992 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1993 y[n_B*i+idx_V_B[i]] = 1.0; 1994 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1995 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1996 1997 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1998 PetscInt j; 1999 2000 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2005 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2006 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2007 } 2008 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2009 } 2010 /* if n_R == 0 the object is not destroyed */ 2011 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2012 } 2013 2014 if (n_constraints) { 2015 Mat B; 2016 2017 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2018 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2019 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2020 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2021 if (n_vertices) { 2022 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2023 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2024 } else { 2025 Mat S_VCt; 2026 2027 if (lda_rhs != n_R) { 2028 ierr = MatDestroy(&B);CHKERRQ(ierr); 2029 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2030 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2031 } 2032 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2033 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2034 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2035 } 2036 } 2037 ierr = MatDestroy(&B);CHKERRQ(ierr); 2038 /* coarse basis functions */ 2039 for (i=0;i<n_constraints;i++) { 2040 PetscScalar *y; 2041 2042 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2043 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2044 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2045 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2046 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2047 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2048 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2049 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2050 PetscInt j; 2051 2052 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2053 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2054 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2055 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2056 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2057 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2058 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2059 } 2060 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2061 } 2062 } 2063 if (n_constraints) { 2064 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2065 } 2066 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2067 2068 /* coarse matrix entries relative to B_0 */ 2069 if (pcbddc->benign_n) { 2070 Mat B0_B,B0_BPHI; 2071 IS is_dummy; 2072 PetscScalar *data; 2073 PetscInt j; 2074 2075 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2076 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2077 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2078 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2079 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2080 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2081 for (j=0;j<pcbddc->benign_n;j++) { 2082 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2083 for (i=0;i<pcbddc->local_primal_size;i++) { 2084 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2085 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2086 } 2087 } 2088 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2089 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2090 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2091 } 2092 2093 /* compute other basis functions for non-symmetric problems */ 2094 if (!pcbddc->symmetric_primal) { 2095 Mat B_V=NULL,B_C=NULL; 2096 PetscScalar *marray; 2097 2098 if (n_constraints) { 2099 Mat S_CCT,C_CRT; 2100 2101 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2102 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2103 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2104 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2105 if (n_vertices) { 2106 Mat S_VCT; 2107 2108 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2109 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2110 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2111 } 2112 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2113 } 2114 if (n_vertices && n_R) { 2115 PetscScalar *av,*marray; 2116 const PetscInt *xadj,*adjncy; 2117 PetscInt n; 2118 PetscBool flg_row; 2119 2120 /* B_V = B_V - A_VR^T */ 2121 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2122 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2123 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2124 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2125 for (i=0;i<n;i++) { 2126 PetscInt j; 2127 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2128 } 2129 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2130 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2131 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2132 } 2133 2134 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2135 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2136 for (i=0;i<n_vertices;i++) { 2137 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2138 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2139 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2140 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2141 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2142 } 2143 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2144 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2145 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2146 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2147 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2148 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2149 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2150 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2151 } 2152 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2153 /* coarse basis functions */ 2154 for (i=0;i<pcbddc->local_primal_size;i++) { 2155 PetscScalar *y; 2156 2157 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2158 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2159 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2160 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2161 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2162 if (i<n_vertices) { 2163 y[n_B*i+idx_V_B[i]] = 1.0; 2164 } 2165 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2166 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2167 2168 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2169 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2170 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2171 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2172 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2173 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2174 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2175 } 2176 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2177 } 2178 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2179 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2180 } 2181 /* free memory */ 2182 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2183 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2184 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2185 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2186 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2187 ierr = PetscFree(work);CHKERRQ(ierr); 2188 if (n_vertices) { 2189 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2190 } 2191 if (n_constraints) { 2192 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2193 } 2194 /* Checking coarse_sub_mat and coarse basis functios */ 2195 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2196 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2197 if (pcbddc->dbg_flag) { 2198 Mat coarse_sub_mat; 2199 Mat AUXMAT,TM1,TM2,TM3,TM4; 2200 Mat coarse_phi_D,coarse_phi_B; 2201 Mat coarse_psi_D,coarse_psi_B; 2202 Mat A_II,A_BB,A_IB,A_BI; 2203 Mat C_B,CPHI; 2204 IS is_dummy; 2205 Vec mones; 2206 MatType checkmattype=MATSEQAIJ; 2207 PetscReal real_value; 2208 2209 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2210 Mat A; 2211 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2212 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2213 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2214 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2215 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2216 ierr = MatDestroy(&A);CHKERRQ(ierr); 2217 } else { 2218 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2219 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2220 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2221 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2222 } 2223 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2224 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2225 if (!pcbddc->symmetric_primal) { 2226 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2227 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2228 } 2229 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2230 2231 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2232 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2233 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2234 if (!pcbddc->symmetric_primal) { 2235 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2236 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2237 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2238 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2239 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2240 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2241 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2242 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2243 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2244 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2245 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2246 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2247 } else { 2248 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2249 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2250 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2251 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2252 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2253 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2254 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2255 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2256 } 2257 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2258 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2259 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2260 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2261 if (pcbddc->benign_n) { 2262 Mat B0_B,B0_BPHI; 2263 PetscScalar *data,*data2; 2264 PetscInt j; 2265 2266 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2267 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2268 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2269 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2270 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2271 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2272 for (j=0;j<pcbddc->benign_n;j++) { 2273 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2274 for (i=0;i<pcbddc->local_primal_size;i++) { 2275 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2276 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2277 } 2278 } 2279 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2280 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2281 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2282 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2283 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2284 } 2285 #if 0 2286 { 2287 PetscViewer viewer; 2288 char filename[256]; 2289 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2290 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2291 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2292 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2293 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2294 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2295 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2296 if (save_change) { 2297 Mat phi_B; 2298 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2299 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2300 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2301 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2302 } else { 2303 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2304 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2305 } 2306 if (pcbddc->coarse_phi_D) { 2307 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2308 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2309 } 2310 if (pcbddc->coarse_psi_B) { 2311 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2312 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2313 } 2314 if (pcbddc->coarse_psi_D) { 2315 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2316 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2317 } 2318 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2319 } 2320 #endif 2321 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2322 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2323 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2324 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2325 2326 /* check constraints */ 2327 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2328 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2329 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2330 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2331 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2332 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2333 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2334 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2336 if (!pcbddc->symmetric_primal) { 2337 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2338 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2339 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2340 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2341 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2342 } 2343 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2344 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2345 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2346 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2347 } 2348 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2349 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2350 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2351 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2352 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2353 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2354 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2355 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2356 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2357 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2358 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2359 if (!pcbddc->symmetric_primal) { 2360 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2361 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2362 } 2363 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2364 } 2365 /* get back data */ 2366 *coarse_submat_vals_n = coarse_submat_vals; 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2372 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2373 { 2374 Mat *work_mat; 2375 IS isrow_s,iscol_s; 2376 PetscBool rsorted,csorted; 2377 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2378 PetscErrorCode ierr; 2379 2380 PetscFunctionBegin; 2381 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2382 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2383 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2384 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2385 2386 if (!rsorted) { 2387 const PetscInt *idxs; 2388 PetscInt *idxs_sorted,i; 2389 2390 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2391 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2392 for (i=0;i<rsize;i++) { 2393 idxs_perm_r[i] = i; 2394 } 2395 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2396 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2397 for (i=0;i<rsize;i++) { 2398 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2399 } 2400 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2401 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2402 } else { 2403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2404 isrow_s = isrow; 2405 } 2406 2407 if (!csorted) { 2408 if (isrow == iscol) { 2409 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2410 iscol_s = isrow_s; 2411 } else { 2412 const PetscInt *idxs; 2413 PetscInt *idxs_sorted,i; 2414 2415 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2416 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2417 for (i=0;i<csize;i++) { 2418 idxs_perm_c[i] = i; 2419 } 2420 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2421 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2422 for (i=0;i<csize;i++) { 2423 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2424 } 2425 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2426 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2427 } 2428 } else { 2429 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2430 iscol_s = iscol; 2431 } 2432 2433 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2434 2435 if (!rsorted || !csorted) { 2436 Mat new_mat; 2437 IS is_perm_r,is_perm_c; 2438 2439 if (!rsorted) { 2440 PetscInt *idxs_r,i; 2441 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2442 for (i=0;i<rsize;i++) { 2443 idxs_r[idxs_perm_r[i]] = i; 2444 } 2445 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2446 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2447 } else { 2448 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2449 } 2450 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2451 2452 if (!csorted) { 2453 if (isrow_s == iscol_s) { 2454 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2455 is_perm_c = is_perm_r; 2456 } else { 2457 PetscInt *idxs_c,i; 2458 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2459 for (i=0;i<csize;i++) { 2460 idxs_c[idxs_perm_c[i]] = i; 2461 } 2462 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2463 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2464 } 2465 } else { 2466 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2467 } 2468 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2469 2470 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2471 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2472 work_mat[0] = new_mat; 2473 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2474 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2475 } 2476 2477 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2478 *B = work_mat[0]; 2479 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2480 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2481 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2487 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2488 { 2489 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2490 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2491 Mat new_mat; 2492 IS is_local,is_global; 2493 PetscInt local_size; 2494 PetscBool isseqaij; 2495 PetscErrorCode ierr; 2496 2497 PetscFunctionBegin; 2498 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2499 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2500 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2501 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2502 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2503 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2504 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2505 2506 /* check */ 2507 if (pcbddc->dbg_flag) { 2508 Vec x,x_change; 2509 PetscReal error; 2510 2511 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2512 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2513 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2514 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2515 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2517 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2518 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2519 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2520 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2522 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2523 ierr = VecDestroy(&x);CHKERRQ(ierr); 2524 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2525 } 2526 2527 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2528 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2529 if (isseqaij) { 2530 Mat M; 2531 2532 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2533 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2534 ierr = MatDestroy(&M);CHKERRQ(ierr); 2535 } else { 2536 Mat work_mat,M; 2537 2538 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2539 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2540 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2541 ierr = MatDestroy(&M);CHKERRQ(ierr); 2542 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2543 } 2544 if (matis->A->symmetric_set) { 2545 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2546 #if !defined(PETSC_USE_COMPLEX) 2547 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2548 #endif 2549 } 2550 /* 2551 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2552 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2553 */ 2554 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2560 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2561 { 2562 PC_IS* pcis = (PC_IS*)(pc->data); 2563 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2564 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2565 PetscInt *idx_R_local=NULL; 2566 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2567 PetscInt vbs,bs; 2568 PetscBT bitmask=NULL; 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 /* 2573 No need to setup local scatters if 2574 - primal space is unchanged 2575 AND 2576 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2577 AND 2578 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2579 */ 2580 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2581 PetscFunctionReturn(0); 2582 } 2583 /* destroy old objects */ 2584 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2585 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2586 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2587 /* Set Non-overlapping dimensions */ 2588 n_B = pcis->n_B; 2589 n_D = pcis->n - n_B; 2590 n_vertices = pcbddc->n_vertices; 2591 2592 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2593 2594 /* create auxiliary bitmask and allocate workspace */ 2595 if (!sub_schurs->reuse_solver) { 2596 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2597 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2598 for (i=0;i<n_vertices;i++) { 2599 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2600 } 2601 2602 for (i=0, n_R=0; i<pcis->n; i++) { 2603 if (!PetscBTLookup(bitmask,i)) { 2604 idx_R_local[n_R++] = i; 2605 } 2606 } 2607 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2608 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2609 2610 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2611 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2612 } 2613 2614 /* Block code */ 2615 vbs = 1; 2616 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2617 if (bs>1 && !(n_vertices%bs)) { 2618 PetscBool is_blocked = PETSC_TRUE; 2619 PetscInt *vary; 2620 if (!sub_schurs->reuse_solver) { 2621 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2622 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2623 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2624 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2625 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2626 for (i=0; i<pcis->n/bs; i++) { 2627 if (vary[i]!=0 && vary[i]!=bs) { 2628 is_blocked = PETSC_FALSE; 2629 break; 2630 } 2631 } 2632 ierr = PetscFree(vary);CHKERRQ(ierr); 2633 } else { 2634 /* Verify directly the R set */ 2635 for (i=0; i<n_R/bs; i++) { 2636 PetscInt j,node=idx_R_local[bs*i]; 2637 for (j=1; j<bs; j++) { 2638 if (node != idx_R_local[bs*i+j]-j) { 2639 is_blocked = PETSC_FALSE; 2640 break; 2641 } 2642 } 2643 } 2644 } 2645 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2646 vbs = bs; 2647 for (i=0;i<n_R/vbs;i++) { 2648 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2649 } 2650 } 2651 } 2652 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2653 if (sub_schurs->reuse_solver) { 2654 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2655 2656 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2657 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2658 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2659 reuse_solver->is_R = pcbddc->is_R_local; 2660 } else { 2661 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2662 } 2663 2664 /* print some info if requested */ 2665 if (pcbddc->dbg_flag) { 2666 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2667 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2668 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2673 } 2674 2675 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2676 if (!sub_schurs->reuse_solver) { 2677 IS is_aux1,is_aux2; 2678 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2679 2680 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2681 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2682 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2683 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2684 for (i=0; i<n_D; i++) { 2685 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2686 } 2687 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2688 for (i=0, j=0; i<n_R; i++) { 2689 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2690 aux_array1[j++] = i; 2691 } 2692 } 2693 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2694 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2695 for (i=0, j=0; i<n_B; i++) { 2696 if (!PetscBTLookup(bitmask,is_indices[i])) { 2697 aux_array2[j++] = i; 2698 } 2699 } 2700 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2701 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2702 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2703 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2704 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2705 2706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2707 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2708 for (i=0, j=0; i<n_R; i++) { 2709 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2710 aux_array1[j++] = i; 2711 } 2712 } 2713 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2714 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2715 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2716 } 2717 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2718 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2719 } else { 2720 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2721 IS tis; 2722 PetscInt schur_size; 2723 2724 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2725 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2726 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2727 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2728 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2729 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2730 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2731 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2732 } 2733 } 2734 PetscFunctionReturn(0); 2735 } 2736 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2740 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2741 { 2742 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2743 PC_IS *pcis = (PC_IS*)pc->data; 2744 PC pc_temp; 2745 Mat A_RR; 2746 MatReuse reuse; 2747 PetscScalar m_one = -1.0; 2748 PetscReal value; 2749 PetscInt n_D,n_R; 2750 PetscBool use_exact,use_exact_reduced,issbaij; 2751 PetscErrorCode ierr; 2752 /* prefixes stuff */ 2753 char dir_prefix[256],neu_prefix[256],str_level[16]; 2754 size_t len; 2755 2756 PetscFunctionBegin; 2757 2758 /* compute prefixes */ 2759 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2760 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2761 if (!pcbddc->current_level) { 2762 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2763 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2764 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2765 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2766 } else { 2767 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2768 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2769 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2770 len -= 15; /* remove "pc_bddc_coarse_" */ 2771 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2772 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2773 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2774 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2775 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2776 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2777 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2778 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2779 } 2780 2781 /* DIRICHLET PROBLEM */ 2782 if (dirichlet) { 2783 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2784 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2785 if (!sub_schurs->reuse_solver) { 2786 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2787 } 2788 if (pcbddc->dbg_flag) { 2789 Mat A_IIn; 2790 2791 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2792 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2793 pcis->A_II = A_IIn; 2794 } 2795 } 2796 if (pcbddc->local_mat->symmetric_set) { 2797 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2798 } 2799 /* Matrix for Dirichlet problem is pcis->A_II */ 2800 n_D = pcis->n - pcis->n_B; 2801 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2802 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2803 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2804 /* default */ 2805 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2806 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2807 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2808 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2809 if (issbaij) { 2810 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2811 } else { 2812 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2813 } 2814 /* Allow user's customization */ 2815 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2816 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2817 } 2818 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2819 if (sub_schurs->reuse_solver) { 2820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2821 2822 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2823 } 2824 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2825 if (!n_D) { 2826 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2827 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2828 } 2829 /* Set Up KSP for Dirichlet problem of BDDC */ 2830 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2831 /* set ksp_D into pcis data */ 2832 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2833 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2834 pcis->ksp_D = pcbddc->ksp_D; 2835 } 2836 2837 /* NEUMANN PROBLEM */ 2838 A_RR = 0; 2839 if (neumann) { 2840 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2841 PetscInt ibs,mbs; 2842 PetscBool issbaij; 2843 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2844 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2845 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2846 if (pcbddc->ksp_R) { /* already created ksp */ 2847 PetscInt nn_R; 2848 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2849 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2850 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2851 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2852 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2853 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2854 reuse = MAT_INITIAL_MATRIX; 2855 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2856 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2857 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2858 reuse = MAT_INITIAL_MATRIX; 2859 } else { /* safe to reuse the matrix */ 2860 reuse = MAT_REUSE_MATRIX; 2861 } 2862 } 2863 /* last check */ 2864 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2865 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2866 reuse = MAT_INITIAL_MATRIX; 2867 } 2868 } else { /* first time, so we need to create the matrix */ 2869 reuse = MAT_INITIAL_MATRIX; 2870 } 2871 /* extract A_RR */ 2872 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2873 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2874 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2875 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2876 if (matis->A == pcbddc->local_mat) { 2877 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2878 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2879 } else { 2880 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2881 } 2882 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2883 if (matis->A == pcbddc->local_mat) { 2884 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2885 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2886 } else { 2887 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2888 } 2889 } 2890 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2891 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2892 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2893 } else { 2894 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2895 } 2896 if (pcbddc->local_mat->symmetric_set) { 2897 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2898 } 2899 if (!pcbddc->ksp_R) { /* create object if not present */ 2900 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2901 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2902 /* default */ 2903 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2904 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2905 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2906 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2907 if (issbaij) { 2908 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2909 } else { 2910 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2911 } 2912 /* Allow user's customization */ 2913 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2914 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2915 } 2916 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2917 if (!n_R) { 2918 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2919 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2920 } 2921 /* Reuse solver if it is present */ 2922 if (sub_schurs->reuse_solver) { 2923 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2924 2925 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2926 } 2927 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2928 /* Set Up KSP for Neumann problem of BDDC */ 2929 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2930 } 2931 /* free Neumann problem's matrix */ 2932 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2933 2934 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2935 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2936 if (pcbddc->dbg_flag) { 2937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2938 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2939 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2940 } 2941 if (dirichlet) { /* Dirichlet */ 2942 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2943 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2944 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2945 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2946 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2947 /* need to be adapted? */ 2948 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2949 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2950 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2951 /* print info */ 2952 if (pcbddc->dbg_flag) { 2953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2954 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2955 } 2956 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2957 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2958 } 2959 } 2960 if (neumann) { /* Neumann */ 2961 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2962 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2963 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2964 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2965 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2966 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2967 /* need to be adapted? */ 2968 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2969 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2970 /* print info */ 2971 if (pcbddc->dbg_flag) { 2972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2974 } 2975 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2976 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2977 } 2978 } 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2985 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2986 { 2987 PetscErrorCode ierr; 2988 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2989 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2990 2991 PetscFunctionBegin; 2992 if (!sub_schurs->reuse_solver) { 2993 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2994 } 2995 if (!pcbddc->switch_static) { 2996 if (applytranspose && pcbddc->local_auxmat1) { 2997 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2998 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2999 } 3000 if (!sub_schurs->reuse_solver) { 3001 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3002 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3003 } else { 3004 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3005 3006 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3007 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3008 } 3009 } else { 3010 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3011 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3012 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3013 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3014 if (applytranspose && pcbddc->local_auxmat1) { 3015 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3016 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3017 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3018 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3019 } 3020 } 3021 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3022 if (applytranspose) { 3023 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3024 } else { 3025 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3026 } 3027 } else { 3028 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3029 3030 if (applytranspose) { 3031 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3032 } else { 3033 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3034 } 3035 } 3036 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3037 if (!pcbddc->switch_static) { 3038 if (!sub_schurs->reuse_solver) { 3039 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3040 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3041 } else { 3042 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3043 3044 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3045 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3046 } 3047 if (!applytranspose && pcbddc->local_auxmat1) { 3048 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3049 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3050 } 3051 } else { 3052 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3053 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3054 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3055 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3056 if (!applytranspose && pcbddc->local_auxmat1) { 3057 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3058 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3059 } 3060 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3061 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3062 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3063 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3064 } 3065 PetscFunctionReturn(0); 3066 } 3067 3068 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3071 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3072 { 3073 PetscErrorCode ierr; 3074 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3075 PC_IS* pcis = (PC_IS*) (pc->data); 3076 const PetscScalar zero = 0.0; 3077 3078 PetscFunctionBegin; 3079 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3080 if (!pcbddc->benign_apply_coarse_only) { 3081 if (applytranspose) { 3082 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3083 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3084 } else { 3085 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3086 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3087 } 3088 } else { 3089 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3090 } 3091 3092 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3093 if (pcbddc->benign_n) { 3094 PetscScalar *array; 3095 PetscInt j; 3096 3097 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3098 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3099 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3100 } 3101 3102 /* start communications from local primal nodes to rhs of coarse solver */ 3103 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3104 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 3107 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3108 /* TODO remove null space when doing multilevel */ 3109 if (pcbddc->coarse_ksp) { 3110 Mat coarse_mat; 3111 Vec rhs,sol; 3112 MatNullSpace nullsp; 3113 3114 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3115 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3116 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3117 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3118 if (nullsp) { 3119 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3120 } 3121 if (applytranspose) { 3122 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3123 } else { 3124 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3125 } 3126 if (nullsp) { 3127 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3128 } 3129 } 3130 3131 /* Local solution on R nodes */ 3132 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3133 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3134 } 3135 /* communications from coarse sol to local primal nodes */ 3136 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3137 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3138 3139 /* Sum contributions from the two levels */ 3140 if (!pcbddc->benign_apply_coarse_only) { 3141 if (applytranspose) { 3142 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3143 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3144 } else { 3145 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3146 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3147 } 3148 /* store p0 */ 3149 if (pcbddc->benign_n) { 3150 PetscScalar *array; 3151 PetscInt j; 3152 3153 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3154 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3155 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3156 } 3157 } else { /* expand the coarse solution */ 3158 if (applytranspose) { 3159 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3160 } else { 3161 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3162 } 3163 } 3164 PetscFunctionReturn(0); 3165 } 3166 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3169 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3170 { 3171 PetscErrorCode ierr; 3172 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3173 PetscScalar *array; 3174 Vec from,to; 3175 3176 PetscFunctionBegin; 3177 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3178 from = pcbddc->coarse_vec; 3179 to = pcbddc->vec1_P; 3180 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3181 Vec tvec; 3182 3183 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3184 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3185 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3186 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3187 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3188 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3189 } 3190 } else { /* from local to global -> put data in coarse right hand side */ 3191 from = pcbddc->vec1_P; 3192 to = pcbddc->coarse_vec; 3193 } 3194 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3195 PetscFunctionReturn(0); 3196 } 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3200 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3201 { 3202 PetscErrorCode ierr; 3203 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3204 PetscScalar *array; 3205 Vec from,to; 3206 3207 PetscFunctionBegin; 3208 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3209 from = pcbddc->coarse_vec; 3210 to = pcbddc->vec1_P; 3211 } else { /* from local to global -> put data in coarse right hand side */ 3212 from = pcbddc->vec1_P; 3213 to = pcbddc->coarse_vec; 3214 } 3215 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3216 if (smode == SCATTER_FORWARD) { 3217 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3218 Vec tvec; 3219 3220 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3221 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3222 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3223 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3224 } 3225 } else { 3226 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3227 ierr = VecResetArray(from);CHKERRQ(ierr); 3228 } 3229 } 3230 PetscFunctionReturn(0); 3231 } 3232 3233 /* uncomment for testing purposes */ 3234 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3235 #undef __FUNCT__ 3236 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3237 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3238 { 3239 PetscErrorCode ierr; 3240 PC_IS* pcis = (PC_IS*)(pc->data); 3241 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3243 /* one and zero */ 3244 PetscScalar one=1.0,zero=0.0; 3245 /* space to store constraints and their local indices */ 3246 PetscScalar *constraints_data; 3247 PetscInt *constraints_idxs,*constraints_idxs_B; 3248 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3249 PetscInt *constraints_n; 3250 /* iterators */ 3251 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3252 /* BLAS integers */ 3253 PetscBLASInt lwork,lierr; 3254 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3255 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3256 /* reuse */ 3257 PetscInt olocal_primal_size,olocal_primal_size_cc; 3258 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3259 /* change of basis */ 3260 PetscBool qr_needed; 3261 PetscBT change_basis,qr_needed_idx; 3262 /* auxiliary stuff */ 3263 PetscInt *nnz,*is_indices; 3264 PetscInt ncc; 3265 /* some quantities */ 3266 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3267 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3268 3269 PetscFunctionBegin; 3270 /* Destroy Mat objects computed previously */ 3271 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3272 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3273 /* save info on constraints from previous setup (if any) */ 3274 olocal_primal_size = pcbddc->local_primal_size; 3275 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3276 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3277 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3278 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3279 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3280 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3281 3282 /* print some info */ 3283 if (pcbddc->dbg_flag) { 3284 IS vertices; 3285 PetscInt nv,nedges,nfaces; 3286 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3287 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3288 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3289 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3292 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3294 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3295 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3296 } 3297 3298 if (!pcbddc->adaptive_selection) { 3299 IS ISForVertices,*ISForFaces,*ISForEdges; 3300 MatNullSpace nearnullsp; 3301 const Vec *nearnullvecs; 3302 Vec *localnearnullsp; 3303 PetscScalar *array; 3304 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3305 PetscBool nnsp_has_cnst; 3306 /* LAPACK working arrays for SVD or POD */ 3307 PetscBool skip_lapack,boolforchange; 3308 PetscScalar *work; 3309 PetscReal *singular_vals; 3310 #if defined(PETSC_USE_COMPLEX) 3311 PetscReal *rwork; 3312 #endif 3313 #if defined(PETSC_MISSING_LAPACK_GESVD) 3314 PetscScalar *temp_basis,*correlation_mat; 3315 #else 3316 PetscBLASInt dummy_int=1; 3317 PetscScalar dummy_scalar=1.; 3318 #endif 3319 3320 /* Get index sets for faces, edges and vertices from graph */ 3321 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3322 /* free unneeded index sets */ 3323 if (!pcbddc->use_vertices) { 3324 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3325 } 3326 if (!pcbddc->use_edges) { 3327 for (i=0;i<n_ISForEdges;i++) { 3328 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3329 } 3330 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3331 n_ISForEdges = 0; 3332 } 3333 if (!pcbddc->use_faces) { 3334 for (i=0;i<n_ISForFaces;i++) { 3335 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3336 } 3337 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3338 n_ISForFaces = 0; 3339 } 3340 3341 #if defined(PETSC_USE_DEBUG) 3342 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3343 Also use_change_of_basis should be consistent among processors */ 3344 if (pcbddc->NullSpace) { 3345 PetscBool tbool[2],gbool[2]; 3346 3347 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3348 pcbddc->use_change_of_basis = PETSC_TRUE; 3349 if (!ISForEdges) { 3350 pcbddc->use_change_on_faces = PETSC_TRUE; 3351 } 3352 } 3353 tbool[0] = pcbddc->use_change_of_basis; 3354 tbool[1] = pcbddc->use_change_on_faces; 3355 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3356 pcbddc->use_change_of_basis = gbool[0]; 3357 pcbddc->use_change_on_faces = gbool[1]; 3358 } 3359 #endif 3360 3361 /* check if near null space is attached to global mat */ 3362 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3363 if (nearnullsp) { 3364 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3365 /* remove any stored info */ 3366 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3367 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3368 /* store information for BDDC solver reuse */ 3369 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3370 pcbddc->onearnullspace = nearnullsp; 3371 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3372 for (i=0;i<nnsp_size;i++) { 3373 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3374 } 3375 } else { /* if near null space is not provided BDDC uses constants by default */ 3376 nnsp_size = 0; 3377 nnsp_has_cnst = PETSC_TRUE; 3378 } 3379 /* get max number of constraints on a single cc */ 3380 max_constraints = nnsp_size; 3381 if (nnsp_has_cnst) max_constraints++; 3382 3383 /* 3384 Evaluate maximum storage size needed by the procedure 3385 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3386 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3387 There can be multiple constraints per connected component 3388 */ 3389 n_vertices = 0; 3390 if (ISForVertices) { 3391 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3392 } 3393 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3394 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3395 3396 total_counts = n_ISForFaces+n_ISForEdges; 3397 total_counts *= max_constraints; 3398 total_counts += n_vertices; 3399 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3400 3401 total_counts = 0; 3402 max_size_of_constraint = 0; 3403 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3404 IS used_is; 3405 if (i<n_ISForEdges) { 3406 used_is = ISForEdges[i]; 3407 } else { 3408 used_is = ISForFaces[i-n_ISForEdges]; 3409 } 3410 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3411 total_counts += j; 3412 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3413 } 3414 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3415 3416 /* get local part of global near null space vectors */ 3417 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3418 for (k=0;k<nnsp_size;k++) { 3419 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3420 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3421 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3422 } 3423 3424 /* whether or not to skip lapack calls */ 3425 skip_lapack = PETSC_TRUE; 3426 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3427 3428 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3429 if (!skip_lapack) { 3430 PetscScalar temp_work; 3431 3432 #if defined(PETSC_MISSING_LAPACK_GESVD) 3433 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3434 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3435 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3436 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3437 #if defined(PETSC_USE_COMPLEX) 3438 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3439 #endif 3440 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3441 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3442 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3443 lwork = -1; 3444 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3445 #if !defined(PETSC_USE_COMPLEX) 3446 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3447 #else 3448 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3449 #endif 3450 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3451 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3452 #else /* on missing GESVD */ 3453 /* SVD */ 3454 PetscInt max_n,min_n; 3455 max_n = max_size_of_constraint; 3456 min_n = max_constraints; 3457 if (max_size_of_constraint < max_constraints) { 3458 min_n = max_size_of_constraint; 3459 max_n = max_constraints; 3460 } 3461 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3462 #if defined(PETSC_USE_COMPLEX) 3463 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3464 #endif 3465 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3466 lwork = -1; 3467 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3468 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3469 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3470 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3471 #if !defined(PETSC_USE_COMPLEX) 3472 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3473 #else 3474 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3475 #endif 3476 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3477 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3478 #endif /* on missing GESVD */ 3479 /* Allocate optimal workspace */ 3480 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3482 } 3483 /* Now we can loop on constraining sets */ 3484 total_counts = 0; 3485 constraints_idxs_ptr[0] = 0; 3486 constraints_data_ptr[0] = 0; 3487 /* vertices */ 3488 if (n_vertices) { 3489 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3490 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3491 for (i=0;i<n_vertices;i++) { 3492 constraints_n[total_counts] = 1; 3493 constraints_data[total_counts] = 1.0; 3494 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3495 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3496 total_counts++; 3497 } 3498 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3499 n_vertices = total_counts; 3500 } 3501 3502 /* edges and faces */ 3503 total_counts_cc = total_counts; 3504 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3505 IS used_is; 3506 PetscBool idxs_copied = PETSC_FALSE; 3507 3508 if (ncc<n_ISForEdges) { 3509 used_is = ISForEdges[ncc]; 3510 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3511 } else { 3512 used_is = ISForFaces[ncc-n_ISForEdges]; 3513 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3514 } 3515 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3516 3517 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3518 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3519 /* change of basis should not be performed on local periodic nodes */ 3520 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3521 if (nnsp_has_cnst) { 3522 PetscScalar quad_value; 3523 3524 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3525 idxs_copied = PETSC_TRUE; 3526 3527 if (!pcbddc->use_nnsp_true) { 3528 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3529 } else { 3530 quad_value = 1.0; 3531 } 3532 for (j=0;j<size_of_constraint;j++) { 3533 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3534 } 3535 temp_constraints++; 3536 total_counts++; 3537 } 3538 for (k=0;k<nnsp_size;k++) { 3539 PetscReal real_value; 3540 PetscScalar *ptr_to_data; 3541 3542 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3543 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3544 for (j=0;j<size_of_constraint;j++) { 3545 ptr_to_data[j] = array[is_indices[j]]; 3546 } 3547 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3548 /* check if array is null on the connected component */ 3549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3550 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3551 if (real_value > 0.0) { /* keep indices and values */ 3552 temp_constraints++; 3553 total_counts++; 3554 if (!idxs_copied) { 3555 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3556 idxs_copied = PETSC_TRUE; 3557 } 3558 } 3559 } 3560 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3561 valid_constraints = temp_constraints; 3562 if (!pcbddc->use_nnsp_true && temp_constraints) { 3563 if (temp_constraints == 1) { /* just normalize the constraint */ 3564 PetscScalar norm,*ptr_to_data; 3565 3566 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3568 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3569 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3570 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3571 } else { /* perform SVD */ 3572 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3573 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3574 3575 #if defined(PETSC_MISSING_LAPACK_GESVD) 3576 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3577 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3578 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3579 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3580 from that computed using LAPACKgesvd 3581 -> This is due to a different computation of eigenvectors in LAPACKheev 3582 -> The quality of the POD-computed basis will be the same */ 3583 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3584 /* Store upper triangular part of correlation matrix */ 3585 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3586 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3587 for (j=0;j<temp_constraints;j++) { 3588 for (k=0;k<j+1;k++) { 3589 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3590 } 3591 } 3592 /* compute eigenvalues and eigenvectors of correlation matrix */ 3593 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3594 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3595 #if !defined(PETSC_USE_COMPLEX) 3596 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3597 #else 3598 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3599 #endif 3600 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3601 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3602 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3603 j = 0; 3604 while (j < temp_constraints && singular_vals[j] < tol) j++; 3605 total_counts = total_counts-j; 3606 valid_constraints = temp_constraints-j; 3607 /* scale and copy POD basis into used quadrature memory */ 3608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3609 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3610 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3611 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3612 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3614 if (j<temp_constraints) { 3615 PetscInt ii; 3616 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3617 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3618 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3619 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3620 for (k=0;k<temp_constraints-j;k++) { 3621 for (ii=0;ii<size_of_constraint;ii++) { 3622 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3623 } 3624 } 3625 } 3626 #else /* on missing GESVD */ 3627 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3628 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3629 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3630 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3631 #if !defined(PETSC_USE_COMPLEX) 3632 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3633 #else 3634 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3635 #endif 3636 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3637 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3638 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3639 k = temp_constraints; 3640 if (k > size_of_constraint) k = size_of_constraint; 3641 j = 0; 3642 while (j < k && singular_vals[k-j-1] < tol) j++; 3643 valid_constraints = k-j; 3644 total_counts = total_counts-temp_constraints+valid_constraints; 3645 #endif /* on missing GESVD */ 3646 } 3647 } 3648 /* update pointers information */ 3649 if (valid_constraints) { 3650 constraints_n[total_counts_cc] = valid_constraints; 3651 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3652 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3653 /* set change_of_basis flag */ 3654 if (boolforchange) { 3655 PetscBTSet(change_basis,total_counts_cc); 3656 } 3657 total_counts_cc++; 3658 } 3659 } 3660 /* free workspace */ 3661 if (!skip_lapack) { 3662 ierr = PetscFree(work);CHKERRQ(ierr); 3663 #if defined(PETSC_USE_COMPLEX) 3664 ierr = PetscFree(rwork);CHKERRQ(ierr); 3665 #endif 3666 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3667 #if defined(PETSC_MISSING_LAPACK_GESVD) 3668 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3669 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3670 #endif 3671 } 3672 for (k=0;k<nnsp_size;k++) { 3673 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3674 } 3675 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3676 /* free index sets of faces, edges and vertices */ 3677 for (i=0;i<n_ISForFaces;i++) { 3678 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3679 } 3680 if (n_ISForFaces) { 3681 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3682 } 3683 for (i=0;i<n_ISForEdges;i++) { 3684 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3685 } 3686 if (n_ISForEdges) { 3687 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3688 } 3689 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3690 } else { 3691 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3692 3693 total_counts = 0; 3694 n_vertices = 0; 3695 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3696 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3697 } 3698 max_constraints = 0; 3699 total_counts_cc = 0; 3700 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3701 total_counts += pcbddc->adaptive_constraints_n[i]; 3702 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3703 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3704 } 3705 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3706 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3707 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3708 constraints_data = pcbddc->adaptive_constraints_data; 3709 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3710 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3711 total_counts_cc = 0; 3712 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3713 if (pcbddc->adaptive_constraints_n[i]) { 3714 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3715 } 3716 } 3717 #if 0 3718 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3719 for (i=0;i<total_counts_cc;i++) { 3720 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3721 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3722 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3723 printf(" %d",constraints_idxs[j]); 3724 } 3725 printf("\n"); 3726 printf("number of cc: %d\n",constraints_n[i]); 3727 } 3728 for (i=0;i<n_vertices;i++) { 3729 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3730 } 3731 for (i=0;i<sub_schurs->n_subs;i++) { 3732 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3733 } 3734 #endif 3735 3736 max_size_of_constraint = 0; 3737 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3738 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3739 /* Change of basis */ 3740 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3741 if (pcbddc->use_change_of_basis) { 3742 for (i=0;i<sub_schurs->n_subs;i++) { 3743 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3744 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3745 } 3746 } 3747 } 3748 } 3749 pcbddc->local_primal_size = total_counts; 3750 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3751 3752 /* map constraints_idxs in boundary numbering */ 3753 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3754 if (i != constraints_idxs_ptr[total_counts_cc]) { 3755 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3756 } 3757 3758 /* Create constraint matrix */ 3759 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3760 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3761 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3762 3763 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3764 /* determine if a QR strategy is needed for change of basis */ 3765 qr_needed = PETSC_FALSE; 3766 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3767 total_primal_vertices=0; 3768 pcbddc->local_primal_size_cc = 0; 3769 for (i=0;i<total_counts_cc;i++) { 3770 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3771 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3772 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3773 pcbddc->local_primal_size_cc += 1; 3774 } else if (PetscBTLookup(change_basis,i)) { 3775 for (k=0;k<constraints_n[i];k++) { 3776 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3777 } 3778 pcbddc->local_primal_size_cc += constraints_n[i]; 3779 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3780 PetscBTSet(qr_needed_idx,i); 3781 qr_needed = PETSC_TRUE; 3782 } 3783 } else { 3784 pcbddc->local_primal_size_cc += 1; 3785 } 3786 } 3787 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3788 pcbddc->n_vertices = total_primal_vertices; 3789 /* permute indices in order to have a sorted set of vertices */ 3790 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3791 3792 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3793 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3794 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3795 3796 /* nonzero structure of constraint matrix */ 3797 /* and get reference dof for local constraints */ 3798 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3799 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3800 3801 j = total_primal_vertices; 3802 total_counts = total_primal_vertices; 3803 cum = total_primal_vertices; 3804 for (i=n_vertices;i<total_counts_cc;i++) { 3805 if (!PetscBTLookup(change_basis,i)) { 3806 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3807 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3808 cum++; 3809 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3810 for (k=0;k<constraints_n[i];k++) { 3811 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3812 nnz[j+k] = size_of_constraint; 3813 } 3814 j += constraints_n[i]; 3815 } 3816 } 3817 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3818 ierr = PetscFree(nnz);CHKERRQ(ierr); 3819 3820 /* set values in constraint matrix */ 3821 for (i=0;i<total_primal_vertices;i++) { 3822 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3823 } 3824 total_counts = total_primal_vertices; 3825 for (i=n_vertices;i<total_counts_cc;i++) { 3826 if (!PetscBTLookup(change_basis,i)) { 3827 PetscInt *cols; 3828 3829 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3830 cols = constraints_idxs+constraints_idxs_ptr[i]; 3831 for (k=0;k<constraints_n[i];k++) { 3832 PetscInt row = total_counts+k; 3833 PetscScalar *vals; 3834 3835 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3836 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3837 } 3838 total_counts += constraints_n[i]; 3839 } 3840 } 3841 /* assembling */ 3842 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3844 3845 /* 3846 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3847 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3848 */ 3849 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3850 if (pcbddc->use_change_of_basis) { 3851 /* dual and primal dofs on a single cc */ 3852 PetscInt dual_dofs,primal_dofs; 3853 /* working stuff for GEQRF */ 3854 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3855 PetscBLASInt lqr_work; 3856 /* working stuff for UNGQR */ 3857 PetscScalar *gqr_work,lgqr_work_t; 3858 PetscBLASInt lgqr_work; 3859 /* working stuff for TRTRS */ 3860 PetscScalar *trs_rhs; 3861 PetscBLASInt Blas_NRHS; 3862 /* pointers for values insertion into change of basis matrix */ 3863 PetscInt *start_rows,*start_cols; 3864 PetscScalar *start_vals; 3865 /* working stuff for values insertion */ 3866 PetscBT is_primal; 3867 PetscInt *aux_primal_numbering_B; 3868 /* matrix sizes */ 3869 PetscInt global_size,local_size; 3870 /* temporary change of basis */ 3871 Mat localChangeOfBasisMatrix; 3872 /* extra space for debugging */ 3873 PetscScalar *dbg_work; 3874 3875 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3876 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3877 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3878 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3879 /* nonzeros for local mat */ 3880 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3881 for (i=0;i<pcis->n;i++) nnz[i]=1; 3882 for (i=n_vertices;i<total_counts_cc;i++) { 3883 if (PetscBTLookup(change_basis,i)) { 3884 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3885 if (PetscBTLookup(qr_needed_idx,i)) { 3886 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3887 } else { 3888 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3889 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3890 } 3891 } 3892 } 3893 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3894 ierr = PetscFree(nnz);CHKERRQ(ierr); 3895 /* Set initial identity in the matrix */ 3896 for (i=0;i<pcis->n;i++) { 3897 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3898 } 3899 3900 if (pcbddc->dbg_flag) { 3901 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3902 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3903 } 3904 3905 3906 /* Now we loop on the constraints which need a change of basis */ 3907 /* 3908 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3909 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3910 3911 Basic blocks of change of basis matrix T computed by 3912 3913 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3914 3915 | 1 0 ... 0 s_1/S | 3916 | 0 1 ... 0 s_2/S | 3917 | ... | 3918 | 0 ... 1 s_{n-1}/S | 3919 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3920 3921 with S = \sum_{i=1}^n s_i^2 3922 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3923 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3924 3925 - QR decomposition of constraints otherwise 3926 */ 3927 if (qr_needed) { 3928 /* space to store Q */ 3929 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3930 /* first we issue queries for optimal work */ 3931 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3932 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3933 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3934 lqr_work = -1; 3935 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3936 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3937 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3938 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3939 lgqr_work = -1; 3940 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3941 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3942 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3943 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3944 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3945 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3946 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3947 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3948 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3949 /* array to store scaling factors for reflectors */ 3950 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3951 /* array to store rhs and solution of triangular solver */ 3952 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3953 /* allocating workspace for check */ 3954 if (pcbddc->dbg_flag) { 3955 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3956 } 3957 } 3958 /* array to store whether a node is primal or not */ 3959 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3960 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3961 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3962 if (i != total_primal_vertices) { 3963 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3964 } 3965 for (i=0;i<total_primal_vertices;i++) { 3966 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3967 } 3968 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3969 3970 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3971 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3972 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3973 if (PetscBTLookup(change_basis,total_counts)) { 3974 /* get constraint info */ 3975 primal_dofs = constraints_n[total_counts]; 3976 dual_dofs = size_of_constraint-primal_dofs; 3977 3978 if (pcbddc->dbg_flag) { 3979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3980 } 3981 3982 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3983 3984 /* copy quadrature constraints for change of basis check */ 3985 if (pcbddc->dbg_flag) { 3986 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3987 } 3988 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3989 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3990 3991 /* compute QR decomposition of constraints */ 3992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3993 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3994 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3995 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3996 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3997 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3998 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3999 4000 /* explictly compute R^-T */ 4001 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4002 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4003 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4004 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4006 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4007 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4008 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4009 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4010 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4011 4012 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4015 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4016 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4017 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4018 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4019 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4020 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4021 4022 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4023 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4024 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4025 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4026 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4027 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4029 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4031 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4032 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4033 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4034 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4035 4036 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4037 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4038 /* insert cols for primal dofs */ 4039 for (j=0;j<primal_dofs;j++) { 4040 start_vals = &qr_basis[j*size_of_constraint]; 4041 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4042 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4043 } 4044 /* insert cols for dual dofs */ 4045 for (j=0,k=0;j<dual_dofs;k++) { 4046 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4047 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4048 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4049 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4050 j++; 4051 } 4052 } 4053 4054 /* check change of basis */ 4055 if (pcbddc->dbg_flag) { 4056 PetscInt ii,jj; 4057 PetscBool valid_qr=PETSC_TRUE; 4058 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4062 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4063 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4064 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4065 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4066 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4067 for (jj=0;jj<size_of_constraint;jj++) { 4068 for (ii=0;ii<primal_dofs;ii++) { 4069 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4070 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4071 } 4072 } 4073 if (!valid_qr) { 4074 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4075 for (jj=0;jj<size_of_constraint;jj++) { 4076 for (ii=0;ii<primal_dofs;ii++) { 4077 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4078 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4079 } 4080 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4081 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4082 } 4083 } 4084 } 4085 } else { 4086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4087 } 4088 } 4089 } else { /* simple transformation block */ 4090 PetscInt row,col; 4091 PetscScalar val,norm; 4092 4093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4094 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4095 for (j=0;j<size_of_constraint;j++) { 4096 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4097 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4098 if (!PetscBTLookup(is_primal,row_B)) { 4099 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4100 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4101 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4102 } else { 4103 for (k=0;k<size_of_constraint;k++) { 4104 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4105 if (row != col) { 4106 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4107 } else { 4108 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4109 } 4110 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4111 } 4112 } 4113 } 4114 if (pcbddc->dbg_flag) { 4115 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4116 } 4117 } 4118 } else { 4119 if (pcbddc->dbg_flag) { 4120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4121 } 4122 } 4123 } 4124 4125 /* free workspace */ 4126 if (qr_needed) { 4127 if (pcbddc->dbg_flag) { 4128 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4129 } 4130 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4131 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4132 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4133 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4134 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4135 } 4136 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4137 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4138 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4139 4140 /* assembling of global change of variable */ 4141 if (!pcbddc->fake_change) { 4142 Mat tmat; 4143 PetscInt bs; 4144 4145 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4146 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4147 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4148 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4149 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4150 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4151 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4152 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4153 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4154 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4155 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4156 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4157 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4158 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4159 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4160 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4161 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4162 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4163 4164 /* check */ 4165 if (pcbddc->dbg_flag) { 4166 PetscReal error; 4167 Vec x,x_change; 4168 4169 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4170 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4171 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4172 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4173 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4174 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4175 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4176 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4177 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4178 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4179 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4180 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4183 ierr = VecDestroy(&x);CHKERRQ(ierr); 4184 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4185 } 4186 /* adapt sub_schurs computed (if any) */ 4187 if (pcbddc->use_deluxe_scaling) { 4188 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4189 4190 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4191 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4192 } 4193 if (sub_schurs->S_Ej_all) { 4194 Mat S_new,tmat; 4195 IS is_all_N,is_V_Sall = NULL; 4196 4197 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4198 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4199 if (pcbddc->deluxe_zerorows) { 4200 ISLocalToGlobalMapping NtoSall; 4201 IS is_V; 4202 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4203 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4204 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4205 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4206 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4207 } 4208 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4209 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4210 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4211 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4212 if (pcbddc->deluxe_zerorows) { 4213 const PetscScalar *array; 4214 const PetscInt *idxs_V,*idxs_all; 4215 PetscInt i,n_V; 4216 4217 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4218 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4219 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4220 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4221 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4222 for (i=0;i<n_V;i++) { 4223 PetscScalar val; 4224 PetscInt idx; 4225 4226 idx = idxs_V[i]; 4227 val = array[idxs_all[idxs_V[i]]]; 4228 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4229 } 4230 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4231 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4232 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4233 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4234 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4235 } 4236 sub_schurs->S_Ej_all = S_new; 4237 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4238 if (sub_schurs->sum_S_Ej_all) { 4239 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4240 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4241 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4242 if (pcbddc->deluxe_zerorows) { 4243 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4244 } 4245 sub_schurs->sum_S_Ej_all = S_new; 4246 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4247 } 4248 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4249 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4250 } 4251 } 4252 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4253 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4254 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4255 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4256 pcbddc->use_qr_single = qr_needed; 4257 } 4258 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4259 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4260 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4261 } 4262 4263 /* set up change of basis context */ 4264 if (pcbddc->ChangeOfBasisMatrix) { 4265 PCBDDCChange_ctx change_ctx; 4266 4267 if (!pcbddc->new_global_mat) { 4268 PetscInt global_size,local_size; 4269 4270 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4271 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4272 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4273 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4274 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4275 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4276 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4277 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4278 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4279 } else { 4280 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4281 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4282 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4283 } 4284 if (!pcbddc->user_ChangeOfBasisMatrix) { 4285 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4286 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4287 } else { 4288 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4289 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4290 } 4291 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4292 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4293 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4294 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4295 } else { 4296 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4297 } 4298 4299 if (!pcbddc->fake_change) { 4300 /* add pressure dofs to set of primal nodes for numbering purposes */ 4301 for (i=0;i<pcbddc->benign_n;i++) { 4302 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4303 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4304 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4305 pcbddc->local_primal_size_cc++; 4306 pcbddc->local_primal_size++; 4307 } 4308 4309 /* check if a new primal space has been introduced (also take into account benign trick) */ 4310 pcbddc->new_primal_space_local = PETSC_TRUE; 4311 if (olocal_primal_size == pcbddc->local_primal_size) { 4312 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4313 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4314 if (!pcbddc->new_primal_space_local) { 4315 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4316 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4317 } 4318 } 4319 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4320 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4321 } 4322 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4323 4324 /* flush dbg viewer */ 4325 if (pcbddc->dbg_flag) { 4326 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4327 } 4328 4329 /* free workspace */ 4330 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4331 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4332 if (!pcbddc->adaptive_selection) { 4333 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4334 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4335 } else { 4336 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4337 pcbddc->adaptive_constraints_idxs_ptr, 4338 pcbddc->adaptive_constraints_data_ptr, 4339 pcbddc->adaptive_constraints_idxs, 4340 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4341 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4342 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4343 } 4344 PetscFunctionReturn(0); 4345 } 4346 4347 #undef __FUNCT__ 4348 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4349 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4350 { 4351 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4352 PC_IS *pcis = (PC_IS*)pc->data; 4353 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4354 PetscInt ierr,i,N; 4355 PetscViewer viewer=pcbddc->dbg_viewer; 4356 4357 PetscFunctionBegin; 4358 /* Reset previously computed graph */ 4359 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4360 /* Init local Graph struct */ 4361 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4362 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4363 4364 /* Check validity of the csr graph passed in by the user */ 4365 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4366 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4367 } 4368 4369 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4370 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4371 PetscInt *xadj,*adjncy; 4372 PetscInt nvtxs; 4373 PetscBool flg_row=PETSC_FALSE; 4374 4375 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4376 if (flg_row) { 4377 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4378 pcbddc->computed_rowadj = PETSC_TRUE; 4379 } 4380 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4381 } 4382 if (pcbddc->dbg_flag) { 4383 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4384 } 4385 4386 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4387 if (pcbddc->user_provided_isfordofs) { 4388 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4389 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4390 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4391 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4392 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4393 } 4394 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4395 pcbddc->n_ISForDofs = 0; 4396 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4397 } 4398 } else { 4399 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4400 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4401 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4402 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4403 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4404 } 4405 } 4406 } 4407 4408 /* Setup of Graph */ 4409 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4410 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4411 } 4412 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4413 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4414 } 4415 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4416 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4417 } 4418 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4419 4420 /* attach info on disconnected subdomains if present */ 4421 if (pcbddc->n_local_subs) { 4422 PetscInt *local_subs; 4423 4424 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4425 for (i=0;i<pcbddc->n_local_subs;i++) { 4426 const PetscInt *idxs; 4427 PetscInt nl,j; 4428 4429 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4430 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4431 for (j=0;j<nl;j++) { 4432 local_subs[idxs[j]] = i; 4433 } 4434 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4435 } 4436 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4437 pcbddc->mat_graph->local_subs = local_subs; 4438 } 4439 4440 /* Graph's connected components analysis */ 4441 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4442 4443 /* print some info to stdout */ 4444 if (pcbddc->dbg_flag) { 4445 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4446 } 4447 PetscFunctionReturn(0); 4448 } 4449 4450 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4451 #undef __FUNCT__ 4452 #define __FUNCT__ "PCBDDCSubsetNumbering" 4453 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4454 { 4455 PetscSF sf; 4456 PetscLayout map; 4457 const PetscInt *idxs; 4458 PetscInt *leaf_data,*root_data,*gidxs; 4459 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4460 PetscInt n_n,nlocals,start,first_index; 4461 PetscMPIInt commsize; 4462 PetscBool first_found; 4463 PetscErrorCode ierr; 4464 4465 PetscFunctionBegin; 4466 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4467 if (subset_mult) { 4468 PetscCheckSameComm(subset,1,subset_mult,2); 4469 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4470 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4471 } 4472 /* create workspace layout for computing global indices of subset */ 4473 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4474 lbounds[0] = lbounds[1] = 0; 4475 for (i=0;i<n;i++) { 4476 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4477 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4478 } 4479 lbounds[0] = -lbounds[0]; 4480 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4481 gbounds[0] = -gbounds[0]; 4482 N = gbounds[1] - gbounds[0] + 1; 4483 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4484 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4485 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4486 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4487 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4488 4489 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4490 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4491 if (subset_mult) { 4492 const PetscInt* idxs_mult; 4493 4494 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4495 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4496 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4497 } else { 4498 for (i=0;i<n;i++) leaf_data[i] = 1; 4499 } 4500 /* local size of new subset */ 4501 n_n = 0; 4502 for (i=0;i<n;i++) n_n += leaf_data[i]; 4503 4504 /* global indexes in layout */ 4505 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4506 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4507 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4508 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4509 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4510 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4511 4512 /* reduce from leaves to roots */ 4513 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4514 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4515 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4516 4517 /* count indexes in local part of layout */ 4518 nlocals = 0; 4519 first_index = -1; 4520 first_found = PETSC_FALSE; 4521 for (i=0;i<Nl;i++) { 4522 if (!first_found && root_data[i]) { 4523 first_found = PETSC_TRUE; 4524 first_index = i; 4525 } 4526 nlocals += root_data[i]; 4527 } 4528 4529 /* cumulative of number of indexes and size of subset without holes */ 4530 #if defined(PETSC_HAVE_MPI_EXSCAN) 4531 start = 0; 4532 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4533 #else 4534 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4535 start = start-nlocals; 4536 #endif 4537 4538 if (N_n) { /* compute total size of new subset if requested */ 4539 *N_n = start + nlocals; 4540 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4541 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4542 } 4543 4544 /* adapt root data with cumulative */ 4545 if (first_found) { 4546 PetscInt old_index; 4547 4548 root_data[first_index] += start; 4549 old_index = first_index; 4550 for (i=first_index+1;i<Nl;i++) { 4551 if (root_data[i]) { 4552 root_data[i] += root_data[old_index]; 4553 old_index = i; 4554 } 4555 } 4556 } 4557 4558 /* from roots to leaves */ 4559 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4560 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4561 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4562 4563 /* create new IS with global indexes without holes */ 4564 if (subset_mult) { 4565 const PetscInt* idxs_mult; 4566 PetscInt cum; 4567 4568 cum = 0; 4569 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4570 for (i=0;i<n;i++) { 4571 PetscInt j; 4572 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4573 } 4574 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4575 } else { 4576 for (i=0;i<n;i++) { 4577 gidxs[i] = leaf_data[i]-1; 4578 } 4579 } 4580 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4581 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4582 PetscFunctionReturn(0); 4583 } 4584 4585 #undef __FUNCT__ 4586 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4587 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4588 { 4589 PetscInt i,j; 4590 PetscScalar *alphas; 4591 PetscErrorCode ierr; 4592 4593 PetscFunctionBegin; 4594 /* this implements stabilized Gram-Schmidt */ 4595 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4596 for (i=0;i<n;i++) { 4597 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4598 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4599 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4600 } 4601 ierr = PetscFree(alphas);CHKERRQ(ierr); 4602 PetscFunctionReturn(0); 4603 } 4604 4605 #undef __FUNCT__ 4606 #define __FUNCT__ "MatISGetSubassemblingPattern" 4607 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4608 { 4609 IS ranks_send_to; 4610 PetscInt n_neighs,*neighs,*n_shared,**shared; 4611 PetscMPIInt size,rank,color; 4612 PetscInt *xadj,*adjncy; 4613 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4614 PetscInt i,local_size,threshold=0; 4615 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4616 PetscSubcomm subcomm; 4617 PetscErrorCode ierr; 4618 4619 PetscFunctionBegin; 4620 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4621 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4622 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4623 4624 /* Get info on mapping */ 4625 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4626 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4627 4628 /* build local CSR graph of subdomains' connectivity */ 4629 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4630 xadj[0] = 0; 4631 xadj[1] = PetscMax(n_neighs-1,0); 4632 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4633 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4634 4635 if (threshold) { 4636 PetscInt xadj_count = 0; 4637 for (i=1;i<n_neighs;i++) { 4638 if (n_shared[i] > threshold) { 4639 adjncy[xadj_count] = neighs[i]; 4640 adjncy_wgt[xadj_count] = n_shared[i]; 4641 xadj_count++; 4642 } 4643 } 4644 xadj[1] = xadj_count; 4645 } else { 4646 if (xadj[1]) { 4647 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4648 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4649 } 4650 } 4651 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4652 if (use_square) { 4653 for (i=0;i<xadj[1];i++) { 4654 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4655 } 4656 } 4657 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4658 4659 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4660 4661 /* 4662 Restrict work on active processes only. 4663 */ 4664 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4665 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4666 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4667 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4668 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4669 if (color) { 4670 ierr = PetscFree(xadj);CHKERRQ(ierr); 4671 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4672 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4673 } else { 4674 Mat subdomain_adj; 4675 IS new_ranks,new_ranks_contig; 4676 MatPartitioning partitioner; 4677 PetscInt prank,rstart=0,rend=0; 4678 PetscInt *is_indices,*oldranks; 4679 PetscBool aggregate; 4680 4681 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4682 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4683 prank = rank; 4684 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4685 /* 4686 for (i=0;i<size;i++) { 4687 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4688 } 4689 */ 4690 for (i=0;i<xadj[1];i++) { 4691 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4692 } 4693 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4694 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4695 if (aggregate) { 4696 PetscInt lrows,row,ncols,*cols; 4697 PetscMPIInt nrank; 4698 PetscScalar *vals; 4699 4700 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4701 lrows = 0; 4702 if (nrank<redprocs) { 4703 lrows = size/redprocs; 4704 if (nrank<size%redprocs) lrows++; 4705 } 4706 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4707 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4708 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4709 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4710 row = nrank; 4711 ncols = xadj[1]-xadj[0]; 4712 cols = adjncy; 4713 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4714 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4715 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4716 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4717 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4718 ierr = PetscFree(xadj);CHKERRQ(ierr); 4719 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4720 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4721 ierr = PetscFree(vals);CHKERRQ(ierr); 4722 } else { 4723 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4724 } 4725 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4726 4727 /* Partition */ 4728 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4729 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4730 if (use_vwgt) { 4731 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4732 v_wgt[0] = local_size; 4733 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4734 } 4735 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4736 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4737 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4738 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4739 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4740 4741 /* renumber new_ranks to avoid "holes" in new set of processors */ 4742 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4743 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4744 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4745 if (!redprocs) { 4746 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4747 } else { 4748 PetscInt idxs[1]; 4749 PetscMPIInt tag; 4750 MPI_Request *reqs; 4751 4752 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4753 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4754 for (i=rstart;i<rend;i++) { 4755 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4756 } 4757 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4758 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4759 ierr = PetscFree(reqs);CHKERRQ(ierr); 4760 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4761 } 4762 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4763 /* clean up */ 4764 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4765 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4766 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4767 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4768 } 4769 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4770 4771 /* assemble parallel IS for sends */ 4772 i = 1; 4773 if (color) i=0; 4774 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4775 /* get back IS */ 4776 *is_sends = ranks_send_to; 4777 PetscFunctionReturn(0); 4778 } 4779 4780 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4781 4782 #undef __FUNCT__ 4783 #define __FUNCT__ "MatISSubassemble" 4784 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4785 { 4786 Mat local_mat; 4787 IS is_sends_internal; 4788 PetscInt rows,cols,new_local_rows; 4789 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4790 PetscBool ismatis,isdense,newisdense,destroy_mat; 4791 ISLocalToGlobalMapping l2gmap; 4792 PetscInt* l2gmap_indices; 4793 const PetscInt* is_indices; 4794 MatType new_local_type; 4795 /* buffers */ 4796 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4797 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4798 PetscInt *recv_buffer_idxs_local; 4799 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4800 /* MPI */ 4801 MPI_Comm comm,comm_n; 4802 PetscSubcomm subcomm; 4803 PetscMPIInt n_sends,n_recvs,commsize; 4804 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4805 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4806 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4807 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4808 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4809 PetscErrorCode ierr; 4810 4811 PetscFunctionBegin; 4812 /* TODO: add missing checks */ 4813 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4814 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4815 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4816 PetscValidLogicalCollectiveInt(mat,nis,7); 4817 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4818 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4819 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4820 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4821 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4822 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4823 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4824 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4825 PetscInt mrows,mcols,mnrows,mncols; 4826 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4827 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4828 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4829 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4830 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4831 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4832 } 4833 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4834 PetscValidLogicalCollectiveInt(mat,bs,0); 4835 /* prepare IS for sending if not provided */ 4836 if (!is_sends) { 4837 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4838 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4839 } else { 4840 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4841 is_sends_internal = is_sends; 4842 } 4843 4844 /* get comm */ 4845 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4846 4847 /* compute number of sends */ 4848 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4849 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4850 4851 /* compute number of receives */ 4852 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4853 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4854 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4855 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4856 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4857 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4858 ierr = PetscFree(iflags);CHKERRQ(ierr); 4859 4860 /* restrict comm if requested */ 4861 subcomm = 0; 4862 destroy_mat = PETSC_FALSE; 4863 if (restrict_comm) { 4864 PetscMPIInt color,subcommsize; 4865 4866 color = 0; 4867 if (restrict_full) { 4868 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4869 } else { 4870 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4871 } 4872 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4873 subcommsize = commsize - subcommsize; 4874 /* check if reuse has been requested */ 4875 if (reuse == MAT_REUSE_MATRIX) { 4876 if (*mat_n) { 4877 PetscMPIInt subcommsize2; 4878 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4879 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4880 comm_n = PetscObjectComm((PetscObject)*mat_n); 4881 } else { 4882 comm_n = PETSC_COMM_SELF; 4883 } 4884 } else { /* MAT_INITIAL_MATRIX */ 4885 PetscMPIInt rank; 4886 4887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4888 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4889 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4890 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4891 comm_n = PetscSubcommChild(subcomm); 4892 } 4893 /* flag to destroy *mat_n if not significative */ 4894 if (color) destroy_mat = PETSC_TRUE; 4895 } else { 4896 comm_n = comm; 4897 } 4898 4899 /* prepare send/receive buffers */ 4900 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4901 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4902 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4903 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4904 if (nis) { 4905 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4906 } 4907 4908 /* Get data from local matrices */ 4909 if (!isdense) { 4910 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4911 /* TODO: See below some guidelines on how to prepare the local buffers */ 4912 /* 4913 send_buffer_vals should contain the raw values of the local matrix 4914 send_buffer_idxs should contain: 4915 - MatType_PRIVATE type 4916 - PetscInt size_of_l2gmap 4917 - PetscInt global_row_indices[size_of_l2gmap] 4918 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4919 */ 4920 } else { 4921 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4922 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4923 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4924 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4925 send_buffer_idxs[1] = i; 4926 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4927 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4928 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4929 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4930 for (i=0;i<n_sends;i++) { 4931 ilengths_vals[is_indices[i]] = len*len; 4932 ilengths_idxs[is_indices[i]] = len+2; 4933 } 4934 } 4935 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4936 /* additional is (if any) */ 4937 if (nis) { 4938 PetscMPIInt psum; 4939 PetscInt j; 4940 for (j=0,psum=0;j<nis;j++) { 4941 PetscInt plen; 4942 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4943 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4944 psum += len+1; /* indices + lenght */ 4945 } 4946 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4947 for (j=0,psum=0;j<nis;j++) { 4948 PetscInt plen; 4949 const PetscInt *is_array_idxs; 4950 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4951 send_buffer_idxs_is[psum] = plen; 4952 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4953 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4954 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4955 psum += plen+1; /* indices + lenght */ 4956 } 4957 for (i=0;i<n_sends;i++) { 4958 ilengths_idxs_is[is_indices[i]] = psum; 4959 } 4960 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4961 } 4962 4963 buf_size_idxs = 0; 4964 buf_size_vals = 0; 4965 buf_size_idxs_is = 0; 4966 for (i=0;i<n_recvs;i++) { 4967 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4968 buf_size_vals += (PetscInt)olengths_vals[i]; 4969 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4970 } 4971 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4972 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4973 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4974 4975 /* get new tags for clean communications */ 4976 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4977 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4978 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4979 4980 /* allocate for requests */ 4981 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4982 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4983 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4984 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4985 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4986 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4987 4988 /* communications */ 4989 ptr_idxs = recv_buffer_idxs; 4990 ptr_vals = recv_buffer_vals; 4991 ptr_idxs_is = recv_buffer_idxs_is; 4992 for (i=0;i<n_recvs;i++) { 4993 source_dest = onodes[i]; 4994 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4995 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4996 ptr_idxs += olengths_idxs[i]; 4997 ptr_vals += olengths_vals[i]; 4998 if (nis) { 4999 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5000 ptr_idxs_is += olengths_idxs_is[i]; 5001 } 5002 } 5003 for (i=0;i<n_sends;i++) { 5004 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5005 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5006 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5007 if (nis) { 5008 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5009 } 5010 } 5011 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5012 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5013 5014 /* assemble new l2g map */ 5015 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5016 ptr_idxs = recv_buffer_idxs; 5017 new_local_rows = 0; 5018 for (i=0;i<n_recvs;i++) { 5019 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5020 ptr_idxs += olengths_idxs[i]; 5021 } 5022 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5023 ptr_idxs = recv_buffer_idxs; 5024 new_local_rows = 0; 5025 for (i=0;i<n_recvs;i++) { 5026 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5027 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5028 ptr_idxs += olengths_idxs[i]; 5029 } 5030 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5031 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5032 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5033 5034 /* infer new local matrix type from received local matrices type */ 5035 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5036 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5037 if (n_recvs) { 5038 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5039 ptr_idxs = recv_buffer_idxs; 5040 for (i=0;i<n_recvs;i++) { 5041 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5042 new_local_type_private = MATAIJ_PRIVATE; 5043 break; 5044 } 5045 ptr_idxs += olengths_idxs[i]; 5046 } 5047 switch (new_local_type_private) { 5048 case MATDENSE_PRIVATE: 5049 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5050 new_local_type = MATSEQAIJ; 5051 bs = 1; 5052 } else { /* if I receive only 1 dense matrix */ 5053 new_local_type = MATSEQDENSE; 5054 bs = 1; 5055 } 5056 break; 5057 case MATAIJ_PRIVATE: 5058 new_local_type = MATSEQAIJ; 5059 bs = 1; 5060 break; 5061 case MATBAIJ_PRIVATE: 5062 new_local_type = MATSEQBAIJ; 5063 break; 5064 case MATSBAIJ_PRIVATE: 5065 new_local_type = MATSEQSBAIJ; 5066 break; 5067 default: 5068 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5069 break; 5070 } 5071 } else { /* by default, new_local_type is seqdense */ 5072 new_local_type = MATSEQDENSE; 5073 bs = 1; 5074 } 5075 5076 /* create MATIS object if needed */ 5077 if (reuse == MAT_INITIAL_MATRIX) { 5078 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5079 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5080 } else { 5081 /* it also destroys the local matrices */ 5082 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5083 } 5084 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5085 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5086 5087 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5088 5089 /* Global to local map of received indices */ 5090 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5091 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5092 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5093 5094 /* restore attributes -> type of incoming data and its size */ 5095 buf_size_idxs = 0; 5096 for (i=0;i<n_recvs;i++) { 5097 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5098 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5099 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5100 } 5101 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5102 5103 /* set preallocation */ 5104 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5105 if (!newisdense) { 5106 PetscInt *new_local_nnz=0; 5107 5108 ptr_vals = recv_buffer_vals; 5109 ptr_idxs = recv_buffer_idxs_local; 5110 if (n_recvs) { 5111 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5112 } 5113 for (i=0;i<n_recvs;i++) { 5114 PetscInt j; 5115 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5116 for (j=0;j<*(ptr_idxs+1);j++) { 5117 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5118 } 5119 } else { 5120 /* TODO */ 5121 } 5122 ptr_idxs += olengths_idxs[i]; 5123 } 5124 if (new_local_nnz) { 5125 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5126 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5127 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5128 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5129 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5130 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5131 } else { 5132 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5133 } 5134 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5135 } else { 5136 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5137 } 5138 5139 /* set values */ 5140 ptr_vals = recv_buffer_vals; 5141 ptr_idxs = recv_buffer_idxs_local; 5142 for (i=0;i<n_recvs;i++) { 5143 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5144 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5145 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5146 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5147 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5148 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5149 } else { 5150 /* TODO */ 5151 } 5152 ptr_idxs += olengths_idxs[i]; 5153 ptr_vals += olengths_vals[i]; 5154 } 5155 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5156 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5157 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5158 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5159 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5160 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5161 5162 #if 0 5163 if (!restrict_comm) { /* check */ 5164 Vec lvec,rvec; 5165 PetscReal infty_error; 5166 5167 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5168 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5169 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5170 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5171 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5172 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5173 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5174 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5175 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5176 } 5177 #endif 5178 5179 /* assemble new additional is (if any) */ 5180 if (nis) { 5181 PetscInt **temp_idxs,*count_is,j,psum; 5182 5183 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5184 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5185 ptr_idxs = recv_buffer_idxs_is; 5186 psum = 0; 5187 for (i=0;i<n_recvs;i++) { 5188 for (j=0;j<nis;j++) { 5189 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5190 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5191 psum += plen; 5192 ptr_idxs += plen+1; /* shift pointer to received data */ 5193 } 5194 } 5195 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5196 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5197 for (i=1;i<nis;i++) { 5198 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5199 } 5200 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5201 ptr_idxs = recv_buffer_idxs_is; 5202 for (i=0;i<n_recvs;i++) { 5203 for (j=0;j<nis;j++) { 5204 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5205 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5206 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5207 ptr_idxs += plen+1; /* shift pointer to received data */ 5208 } 5209 } 5210 for (i=0;i<nis;i++) { 5211 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5212 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5213 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5214 } 5215 ierr = PetscFree(count_is);CHKERRQ(ierr); 5216 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5217 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5218 } 5219 /* free workspace */ 5220 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5221 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5222 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5223 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5224 if (isdense) { 5225 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5226 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5227 } else { 5228 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5229 } 5230 if (nis) { 5231 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5232 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5233 } 5234 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5235 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5236 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5237 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5238 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5239 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5240 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5241 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5242 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5243 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5244 ierr = PetscFree(onodes);CHKERRQ(ierr); 5245 if (nis) { 5246 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5247 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5248 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5249 } 5250 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5251 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5252 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5253 for (i=0;i<nis;i++) { 5254 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5255 } 5256 *mat_n = NULL; 5257 } 5258 PetscFunctionReturn(0); 5259 } 5260 5261 /* temporary hack into ksp private data structure */ 5262 #include <petsc/private/kspimpl.h> 5263 5264 #undef __FUNCT__ 5265 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5266 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5267 { 5268 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5269 PC_IS *pcis = (PC_IS*)pc->data; 5270 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5271 MatNullSpace CoarseNullSpace=NULL; 5272 ISLocalToGlobalMapping coarse_islg; 5273 IS coarse_is,*isarray; 5274 PetscInt i,im_active=-1,active_procs=-1; 5275 PetscInt nis,nisdofs,nisneu,nisvert; 5276 PC pc_temp; 5277 PCType coarse_pc_type; 5278 KSPType coarse_ksp_type; 5279 PetscBool multilevel_requested,multilevel_allowed; 5280 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5281 Mat t_coarse_mat_is; 5282 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5283 PetscMPIInt all_procs; 5284 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5285 PetscBool compute_vecs = PETSC_FALSE; 5286 PetscScalar *array; 5287 PetscErrorCode ierr; 5288 5289 PetscFunctionBegin; 5290 /* Assign global numbering to coarse dofs */ 5291 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5292 PetscInt ocoarse_size; 5293 compute_vecs = PETSC_TRUE; 5294 ocoarse_size = pcbddc->coarse_size; 5295 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5296 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5297 /* see if we can avoid some work */ 5298 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5299 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5300 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5301 PC pc; 5302 PetscBool isbddc; 5303 5304 /* temporary workaround since PCBDDC does not have a reset method so far */ 5305 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5306 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5307 if (isbddc) { 5308 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5309 } else { 5310 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5311 } 5312 coarse_reuse = PETSC_FALSE; 5313 } else { /* we can safely reuse already computed coarse matrix */ 5314 coarse_reuse = PETSC_TRUE; 5315 } 5316 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5317 coarse_reuse = PETSC_FALSE; 5318 } 5319 /* reset any subassembling information */ 5320 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5321 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5322 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5323 coarse_reuse = PETSC_TRUE; 5324 } 5325 5326 /* count "active" (i.e. with positive local size) and "void" processes */ 5327 im_active = !!(pcis->n); 5328 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5329 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5330 void_procs = all_procs-active_procs; 5331 csin_type_simple = PETSC_TRUE; 5332 redist = PETSC_FALSE; 5333 if (pcbddc->current_level && void_procs) { 5334 csin_ml = PETSC_TRUE; 5335 ncoarse_ml = void_procs; 5336 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5337 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5338 csin_ds = PETSC_TRUE; 5339 ncoarse_ds = pcbddc->redistribute_coarse; 5340 redist = PETSC_TRUE; 5341 } else { 5342 csin_ds = PETSC_TRUE; 5343 ncoarse_ds = active_procs; 5344 redist = PETSC_TRUE; 5345 } 5346 } else { 5347 csin_ml = PETSC_FALSE; 5348 ncoarse_ml = all_procs; 5349 if (void_procs) { 5350 csin_ds = PETSC_TRUE; 5351 ncoarse_ds = void_procs; 5352 csin_type_simple = PETSC_FALSE; 5353 } else { 5354 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5355 csin_ds = PETSC_TRUE; 5356 ncoarse_ds = pcbddc->redistribute_coarse; 5357 redist = PETSC_TRUE; 5358 } else { 5359 csin_ds = PETSC_FALSE; 5360 ncoarse_ds = all_procs; 5361 } 5362 } 5363 } 5364 5365 /* 5366 test if we can go multilevel: three conditions must be satisfied: 5367 - we have not exceeded the number of levels requested 5368 - we can actually subassemble the active processes 5369 - we can find a suitable number of MPI processes where we can place the subassembled problem 5370 */ 5371 multilevel_allowed = PETSC_FALSE; 5372 multilevel_requested = PETSC_FALSE; 5373 if (pcbddc->current_level < pcbddc->max_levels) { 5374 multilevel_requested = PETSC_TRUE; 5375 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5376 multilevel_allowed = PETSC_FALSE; 5377 } else { 5378 multilevel_allowed = PETSC_TRUE; 5379 } 5380 } 5381 /* determine number of process partecipating to coarse solver */ 5382 if (multilevel_allowed) { 5383 ncoarse = ncoarse_ml; 5384 csin = csin_ml; 5385 redist = PETSC_FALSE; 5386 } else { 5387 ncoarse = ncoarse_ds; 5388 csin = csin_ds; 5389 } 5390 5391 /* creates temporary l2gmap and IS for coarse indexes */ 5392 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5393 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5394 5395 /* creates temporary MATIS object for coarse matrix */ 5396 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5397 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5398 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5399 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5400 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5401 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5402 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5403 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5404 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5405 5406 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5407 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5408 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5409 const PetscInt *idxs; 5410 ISLocalToGlobalMapping tmap; 5411 5412 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5413 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5414 /* allocate space for temporary storage */ 5415 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5416 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5417 /* allocate for IS array */ 5418 nisdofs = pcbddc->n_ISForDofsLocal; 5419 nisneu = !!pcbddc->NeumannBoundariesLocal; 5420 nisvert = 0; 5421 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5422 nisvert = 1; 5423 } 5424 nis = nisdofs + nisneu + nisvert; 5425 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5426 /* dofs splitting */ 5427 for (i=0;i<nisdofs;i++) { 5428 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5429 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5430 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5431 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5432 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5433 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5434 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5435 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5436 } 5437 /* neumann boundaries */ 5438 if (pcbddc->NeumannBoundariesLocal) { 5439 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5440 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5441 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5442 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5443 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5444 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5445 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5446 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5447 } 5448 /* primal vertices (benign) */ 5449 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5450 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5451 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5452 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5453 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5454 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5455 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5456 } 5457 /* free memory */ 5458 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5459 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5460 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5461 } else { 5462 nis = 0; 5463 nisdofs = 0; 5464 nisneu = 0; 5465 nisvert = 0; 5466 isarray = NULL; 5467 } 5468 /* destroy no longer needed map */ 5469 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5470 5471 /* restrict on coarse candidates (if needed) */ 5472 coarse_mat_is = NULL; 5473 if (csin) { 5474 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5475 if (redist) { 5476 PetscMPIInt rank; 5477 PetscInt spc,n_spc_p1,dest[1],destsize; 5478 5479 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5480 spc = active_procs/ncoarse; 5481 n_spc_p1 = active_procs%ncoarse; 5482 if (im_active) { 5483 destsize = 1; 5484 if (rank > n_spc_p1*(spc+1)-1) { 5485 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5486 } else { 5487 dest[0] = rank/(spc+1); 5488 } 5489 } else { 5490 destsize = 0; 5491 } 5492 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5493 } else if (csin_type_simple) { 5494 PetscMPIInt rank; 5495 PetscInt issize,isidx; 5496 5497 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5498 if (im_active) { 5499 issize = 1; 5500 isidx = (PetscInt)rank; 5501 } else { 5502 issize = 0; 5503 isidx = -1; 5504 } 5505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5506 } else { /* get a suitable subassembling pattern from MATIS code */ 5507 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5508 } 5509 5510 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5511 if (!redist || ncoarse <= void_procs) { 5512 PetscInt ncoarse_cand,tissize,*nisindices; 5513 PetscInt *coarse_candidates; 5514 const PetscInt* tisindices; 5515 5516 /* get coarse candidates' ranks in pc communicator */ 5517 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5518 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5519 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5520 if (!coarse_candidates[i]) { 5521 coarse_candidates[ncoarse_cand++]=i; 5522 } 5523 } 5524 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5525 5526 5527 if (pcbddc->dbg_flag) { 5528 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5529 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5530 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5531 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5532 for (i=0;i<ncoarse_cand;i++) { 5533 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5534 } 5535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5537 } 5538 /* shift the pattern on coarse candidates */ 5539 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5540 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5541 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5542 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5543 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5544 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5545 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5546 } 5547 if (pcbddc->dbg_flag) { 5548 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5550 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5551 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5552 } 5553 } 5554 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5555 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5556 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5557 } else { /* this is the last level, so use just receiving processes in subcomm */ 5558 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5559 } 5560 } else { 5561 if (pcbddc->dbg_flag) { 5562 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5563 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5564 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5565 } 5566 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5567 coarse_mat_is = t_coarse_mat_is; 5568 } 5569 5570 /* create local to global scatters for coarse problem */ 5571 if (compute_vecs) { 5572 PetscInt lrows; 5573 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5574 if (coarse_mat_is) { 5575 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5576 } else { 5577 lrows = 0; 5578 } 5579 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5580 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5581 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5582 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5583 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5584 } 5585 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5586 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5587 5588 /* set defaults for coarse KSP and PC */ 5589 if (multilevel_allowed) { 5590 coarse_ksp_type = KSPRICHARDSON; 5591 coarse_pc_type = PCBDDC; 5592 } else { 5593 coarse_ksp_type = KSPPREONLY; 5594 coarse_pc_type = PCREDUNDANT; 5595 } 5596 5597 /* print some info if requested */ 5598 if (pcbddc->dbg_flag) { 5599 if (!multilevel_allowed) { 5600 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5601 if (multilevel_requested) { 5602 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5603 } else if (pcbddc->max_levels) { 5604 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5605 } 5606 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5607 } 5608 } 5609 5610 /* create the coarse KSP object only once with defaults */ 5611 if (coarse_mat_is) { 5612 MatReuse coarse_mat_reuse; 5613 PetscViewer dbg_viewer = NULL; 5614 if (pcbddc->dbg_flag) { 5615 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5616 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5617 } 5618 if (!pcbddc->coarse_ksp) { 5619 char prefix[256],str_level[16]; 5620 size_t len; 5621 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5622 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5623 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5624 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5625 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5626 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5627 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5628 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5629 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5630 /* prefix */ 5631 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5632 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5633 if (!pcbddc->current_level) { 5634 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5635 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5636 } else { 5637 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5638 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5639 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5640 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5641 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5642 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5643 } 5644 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5645 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5646 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5647 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5648 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5649 /* allow user customization */ 5650 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5651 } 5652 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5653 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5654 if (nisdofs) { 5655 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5656 for (i=0;i<nisdofs;i++) { 5657 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5658 } 5659 } 5660 if (nisneu) { 5661 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5662 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5663 } 5664 if (nisvert) { 5665 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5666 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5667 } 5668 5669 /* get some info after set from options */ 5670 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5671 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5672 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5673 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5674 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5675 isbddc = PETSC_FALSE; 5676 } 5677 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5678 if (isredundant) { 5679 KSP inner_ksp; 5680 PC inner_pc; 5681 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5682 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5683 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5684 } 5685 5686 /* assemble coarse matrix */ 5687 if (coarse_reuse) { 5688 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5689 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5690 coarse_mat_reuse = MAT_REUSE_MATRIX; 5691 } else { 5692 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5693 } 5694 if (isbddc || isnn) { 5695 if (isbddc) { /* currently there are no APIs for these options */ 5696 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5697 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5698 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5699 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5700 if (pcbddc_coarse->benign_compute_nonetflux) { 5701 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5702 } 5703 } 5704 if (pcbddc->coarsening_ratio > 1) { 5705 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5706 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5707 if (pcbddc->dbg_flag) { 5708 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5709 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5710 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5711 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5712 } 5713 } 5714 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5715 } else { 5716 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5717 coarse_mat = coarse_mat_is; 5718 } 5719 } else { 5720 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5721 } 5722 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5723 5724 /* propagate symmetry info of coarse matrix */ 5725 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5726 if (pc->pmat->symmetric_set) { 5727 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5728 } 5729 if (pc->pmat->hermitian_set) { 5730 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5731 } 5732 if (pc->pmat->spd_set) { 5733 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5734 } 5735 /* set operators */ 5736 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5737 if (pcbddc->dbg_flag) { 5738 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5739 } 5740 } else { /* processes non partecipating to coarse solver (if any) */ 5741 coarse_mat = 0; 5742 } 5743 ierr = PetscFree(isarray);CHKERRQ(ierr); 5744 #if 0 5745 { 5746 PetscViewer viewer; 5747 char filename[256]; 5748 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5749 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5750 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5751 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5752 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5753 } 5754 #endif 5755 5756 /* Compute coarse null space (special handling by BDDC only) */ 5757 #if 0 5758 if (pcbddc->NullSpace) { 5759 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5760 } 5761 #endif 5762 /* hack */ 5763 if (pcbddc->coarse_ksp) { 5764 Vec crhs,csol; 5765 5766 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5767 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5768 if (!csol) { 5769 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5770 } 5771 if (!crhs) { 5772 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5773 } 5774 } 5775 5776 /* compute null space for coarse solver if the benign trick has been requested */ 5777 if (pcbddc->benign_null) { 5778 5779 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5780 for (i=0;i<pcbddc->benign_n;i++) { 5781 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5782 } 5783 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5784 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5785 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5786 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5787 if (coarse_mat) { 5788 Vec nullv; 5789 PetscScalar *array,*array2; 5790 PetscInt nl; 5791 5792 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5793 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5794 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5795 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5796 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5797 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5798 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5799 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5800 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5801 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5802 } 5803 } 5804 5805 if (pcbddc->coarse_ksp) { 5806 PetscBool ispreonly; 5807 5808 if (CoarseNullSpace) { 5809 PetscBool isnull; 5810 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5811 if (0) { 5812 if (isbddc && !pcbddc->benign_saddle_point) { 5813 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5814 } else { 5815 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5816 } 5817 } else { 5818 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5819 } 5820 } 5821 /* setup coarse ksp */ 5822 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5823 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5824 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5825 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5826 KSP check_ksp; 5827 KSPType check_ksp_type; 5828 PC check_pc; 5829 Vec check_vec,coarse_vec; 5830 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5831 PetscInt its; 5832 PetscBool compute_eigs; 5833 PetscReal *eigs_r,*eigs_c; 5834 PetscInt neigs; 5835 const char *prefix; 5836 5837 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5838 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5839 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5840 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5841 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5842 if (ispreonly) { 5843 check_ksp_type = KSPPREONLY; 5844 compute_eigs = PETSC_FALSE; 5845 } else { 5846 check_ksp_type = KSPGMRES; 5847 compute_eigs = PETSC_TRUE; 5848 } 5849 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5850 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5851 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5852 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5853 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5854 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5855 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5856 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5857 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5858 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5859 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5860 /* create random vec */ 5861 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5862 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5863 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5864 if (CoarseNullSpace) { 5865 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5866 } 5867 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5868 /* solve coarse problem */ 5869 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5870 if (CoarseNullSpace) { 5871 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5872 } 5873 /* set eigenvalue estimation if preonly has not been requested */ 5874 if (compute_eigs) { 5875 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5876 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5877 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5878 lambda_max = eigs_r[neigs-1]; 5879 lambda_min = eigs_r[0]; 5880 if (pcbddc->use_coarse_estimates) { 5881 if (lambda_max>lambda_min) { 5882 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5883 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5884 } 5885 } 5886 } 5887 5888 /* check coarse problem residual error */ 5889 if (pcbddc->dbg_flag) { 5890 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5891 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5892 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5893 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5894 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5895 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5896 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5897 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5898 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5899 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5900 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5901 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5902 if (CoarseNullSpace) { 5903 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5904 } 5905 if (compute_eigs) { 5906 PetscReal lambda_max_s,lambda_min_s; 5907 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5908 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5909 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5910 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5911 for (i=0;i<neigs;i++) { 5912 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5913 } 5914 } 5915 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5916 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5917 } 5918 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5919 if (compute_eigs) { 5920 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5921 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5922 } 5923 } 5924 } 5925 /* print additional info */ 5926 if (pcbddc->dbg_flag) { 5927 /* waits until all processes reaches this point */ 5928 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5929 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5930 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5931 } 5932 5933 /* free memory */ 5934 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5935 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5936 PetscFunctionReturn(0); 5937 } 5938 5939 #undef __FUNCT__ 5940 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5941 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5942 { 5943 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5944 PC_IS* pcis = (PC_IS*)pc->data; 5945 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5946 IS subset,subset_mult,subset_n; 5947 PetscInt local_size,coarse_size=0; 5948 PetscInt *local_primal_indices=NULL; 5949 const PetscInt *t_local_primal_indices; 5950 PetscErrorCode ierr; 5951 5952 PetscFunctionBegin; 5953 /* Compute global number of coarse dofs */ 5954 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5955 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5956 } 5957 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5958 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5959 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5960 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5961 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5962 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5963 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5964 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5965 if (local_size != pcbddc->local_primal_size) { 5966 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5967 } 5968 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5969 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5970 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5971 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5972 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5973 5974 /* check numbering */ 5975 if (pcbddc->dbg_flag) { 5976 PetscScalar coarsesum,*array,*array2; 5977 PetscInt i; 5978 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5979 5980 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5982 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5983 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5984 /* counter */ 5985 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5986 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5987 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5988 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5989 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5990 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5991 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5992 for (i=0;i<pcbddc->local_primal_size;i++) { 5993 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5994 } 5995 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5996 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5997 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5998 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5999 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6000 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6001 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6002 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6003 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6004 for (i=0;i<pcis->n;i++) { 6005 if (array[i] != 0.0 && array[i] != array2[i]) { 6006 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6007 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6008 set_error = PETSC_TRUE; 6009 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6010 } 6011 } 6012 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6013 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6014 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6015 for (i=0;i<pcis->n;i++) { 6016 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6017 } 6018 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6019 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6020 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6021 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6022 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6024 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6025 PetscInt *gidxs; 6026 6027 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6028 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6029 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6030 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6031 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6032 for (i=0;i<pcbddc->local_primal_size;i++) { 6033 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6034 } 6035 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6036 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6037 } 6038 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6039 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6040 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6041 } 6042 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6043 /* get back data */ 6044 *coarse_size_n = coarse_size; 6045 *local_primal_indices_n = local_primal_indices; 6046 PetscFunctionReturn(0); 6047 } 6048 6049 #undef __FUNCT__ 6050 #define __FUNCT__ "PCBDDCGlobalToLocal" 6051 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6052 { 6053 IS localis_t; 6054 PetscInt i,lsize,*idxs,n; 6055 PetscScalar *vals; 6056 PetscErrorCode ierr; 6057 6058 PetscFunctionBegin; 6059 /* get indices in local ordering exploiting local to global map */ 6060 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6061 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6062 for (i=0;i<lsize;i++) vals[i] = 1.0; 6063 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6064 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6065 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6066 if (idxs) { /* multilevel guard */ 6067 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6068 } 6069 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6070 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6071 ierr = PetscFree(vals);CHKERRQ(ierr); 6072 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6073 /* now compute set in local ordering */ 6074 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6075 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6076 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6077 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6078 for (i=0,lsize=0;i<n;i++) { 6079 if (PetscRealPart(vals[i]) > 0.5) { 6080 lsize++; 6081 } 6082 } 6083 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6084 for (i=0,lsize=0;i<n;i++) { 6085 if (PetscRealPart(vals[i]) > 0.5) { 6086 idxs[lsize++] = i; 6087 } 6088 } 6089 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6090 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6091 *localis = localis_t; 6092 PetscFunctionReturn(0); 6093 } 6094 6095 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6096 #undef __FUNCT__ 6097 #define __FUNCT__ "PCBDDCMatMult_Private" 6098 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6099 { 6100 PCBDDCChange_ctx change_ctx; 6101 PetscErrorCode ierr; 6102 6103 PetscFunctionBegin; 6104 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6105 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6106 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6107 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6108 PetscFunctionReturn(0); 6109 } 6110 6111 #undef __FUNCT__ 6112 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6113 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6114 { 6115 PCBDDCChange_ctx change_ctx; 6116 PetscErrorCode ierr; 6117 6118 PetscFunctionBegin; 6119 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6120 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6121 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6122 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6123 PetscFunctionReturn(0); 6124 } 6125 6126 #undef __FUNCT__ 6127 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6128 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6129 { 6130 PC_IS *pcis=(PC_IS*)pc->data; 6131 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6132 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6133 Mat S_j; 6134 PetscInt *used_xadj,*used_adjncy; 6135 PetscBool free_used_adj; 6136 PetscErrorCode ierr; 6137 6138 PetscFunctionBegin; 6139 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6140 free_used_adj = PETSC_FALSE; 6141 if (pcbddc->sub_schurs_layers == -1) { 6142 used_xadj = NULL; 6143 used_adjncy = NULL; 6144 } else { 6145 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6146 used_xadj = pcbddc->mat_graph->xadj; 6147 used_adjncy = pcbddc->mat_graph->adjncy; 6148 } else if (pcbddc->computed_rowadj) { 6149 used_xadj = pcbddc->mat_graph->xadj; 6150 used_adjncy = pcbddc->mat_graph->adjncy; 6151 } else { 6152 PetscBool flg_row=PETSC_FALSE; 6153 const PetscInt *xadj,*adjncy; 6154 PetscInt nvtxs; 6155 6156 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6157 if (flg_row) { 6158 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6159 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6160 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6161 free_used_adj = PETSC_TRUE; 6162 } else { 6163 pcbddc->sub_schurs_layers = -1; 6164 used_xadj = NULL; 6165 used_adjncy = NULL; 6166 } 6167 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6168 } 6169 } 6170 6171 /* setup sub_schurs data */ 6172 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6173 if (!sub_schurs->schur_explicit) { 6174 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6175 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6176 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6177 } else { 6178 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6179 PetscBool isseqaij; 6180 PetscInt benign_n; 6181 Mat change = NULL; 6182 IS change_primal = NULL; 6183 6184 if (!pcbddc->use_vertices && reuse_solvers) { 6185 PetscInt n_vertices; 6186 6187 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6188 reuse_solvers = (PetscBool)!n_vertices; 6189 } 6190 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6191 if (!isseqaij) { 6192 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6193 if (matis->A == pcbddc->local_mat) { 6194 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6195 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6196 } else { 6197 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6198 } 6199 } 6200 if (!pcbddc->benign_change_explicit) { 6201 benign_n = pcbddc->benign_n; 6202 } else { 6203 benign_n = 0; 6204 } 6205 /* If the user defines additional constraints, we import them here. 6206 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6207 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6208 PC_IS *pcisf; 6209 PC_BDDC *pcbddcf; 6210 PC pcf; 6211 6212 if (pcbddc->sub_schurs_rebuild) { 6213 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6214 } 6215 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6216 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6217 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6218 /* hacks */ 6219 pcisf = (PC_IS*)pcf->data; 6220 pcisf->is_B_local = pcis->is_B_local; 6221 pcisf->vec1_N = pcis->vec1_N; 6222 pcisf->BtoNmap = pcis->BtoNmap; 6223 pcisf->n = pcis->n; 6224 pcisf->n_B = pcis->n_B; 6225 pcbddcf = (PC_BDDC*)pcf->data; 6226 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6227 pcbddcf->mat_graph = pcbddc->mat_graph; 6228 pcbddcf->use_faces = PETSC_TRUE; 6229 pcbddcf->use_change_of_basis = PETSC_TRUE; 6230 pcbddcf->use_change_on_faces = PETSC_TRUE; 6231 pcbddcf->use_qr_single = PETSC_TRUE; 6232 pcbddcf->fake_change = PETSC_TRUE; 6233 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6234 /* store information on primal vertices and change of basis (in local numbering) */ 6235 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6236 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6237 change = pcbddcf->ConstraintMatrix; 6238 pcbddcf->ConstraintMatrix = NULL; 6239 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6240 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6241 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6242 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6243 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6244 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6245 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6246 pcf->ops->destroy = NULL; 6247 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6248 } 6249 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6250 ierr = MatDestroy(&change);CHKERRQ(ierr); 6251 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6252 } 6253 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6254 6255 /* free adjacency */ 6256 if (free_used_adj) { 6257 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6258 } 6259 PetscFunctionReturn(0); 6260 } 6261 6262 #undef __FUNCT__ 6263 #define __FUNCT__ "PCBDDCInitSubSchurs" 6264 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6265 { 6266 PC_IS *pcis=(PC_IS*)pc->data; 6267 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6268 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6269 PCBDDCGraph graph; 6270 PetscErrorCode ierr; 6271 6272 PetscFunctionBegin; 6273 /* attach interface graph for determining subsets */ 6274 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6275 IS verticesIS,verticescomm; 6276 PetscInt vsize,*idxs; 6277 6278 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6279 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6280 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6281 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6282 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6283 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6284 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6285 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6286 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6287 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6288 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6289 /* 6290 if (pcbddc->dbg_flag) { 6291 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6292 } 6293 */ 6294 } else { 6295 graph = pcbddc->mat_graph; 6296 } 6297 6298 /* sub_schurs init */ 6299 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6300 6301 /* free graph struct */ 6302 if (pcbddc->sub_schurs_rebuild) { 6303 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6304 } 6305 PetscFunctionReturn(0); 6306 } 6307 6308 #undef __FUNCT__ 6309 #define __FUNCT__ "PCBDDCCheckOperator" 6310 PetscErrorCode PCBDDCCheckOperator(PC pc) 6311 { 6312 PC_IS *pcis=(PC_IS*)pc->data; 6313 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6314 PetscErrorCode ierr; 6315 6316 PetscFunctionBegin; 6317 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6318 IS zerodiag = NULL; 6319 Mat S_j,B0_B=NULL; 6320 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6321 PetscScalar *p0_check,*array,*array2; 6322 PetscReal norm; 6323 PetscInt i; 6324 6325 /* B0 and B0_B */ 6326 if (zerodiag) { 6327 IS dummy; 6328 6329 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6330 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6331 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6332 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6333 } 6334 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6335 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6336 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6337 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6338 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6339 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6340 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6341 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6342 /* S_j */ 6343 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6344 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6345 6346 /* mimic vector in \widetilde{W}_\Gamma */ 6347 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6348 /* continuous in primal space */ 6349 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6350 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6351 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6352 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6353 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6354 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6355 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6356 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6357 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6358 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6359 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6360 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6361 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6362 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6363 6364 /* assemble rhs for coarse problem */ 6365 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6366 /* local with Schur */ 6367 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6368 if (zerodiag) { 6369 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6370 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6371 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6372 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6373 } 6374 /* sum on primal nodes the local contributions */ 6375 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6376 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6377 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6378 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6379 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6380 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6381 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6382 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6383 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6384 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6385 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6386 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6387 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6388 /* scale primal nodes (BDDC sums contibutions) */ 6389 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6390 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6391 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6392 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6393 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6394 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6395 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6396 /* global: \widetilde{B0}_B w_\Gamma */ 6397 if (zerodiag) { 6398 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6399 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6400 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6401 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6402 } 6403 /* BDDC */ 6404 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6405 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6406 6407 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6408 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6409 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6410 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6411 for (i=0;i<pcbddc->benign_n;i++) { 6412 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6413 } 6414 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6415 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6416 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6417 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6418 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6419 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6420 } 6421 PetscFunctionReturn(0); 6422 } 6423