1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 /* no-net-flux */ 650 if (pcbddc->benign_compute_nonetflux) { 651 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 652 MatNullSpace near_null_space; 653 ISLocalToGlobalMapping rmap; 654 Vec quad_vec; 655 PetscScalar *pvals; 656 PetscInt i,np,*dummyins; 657 658 /* create vector to hold quadrature weights */ 659 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 660 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 661 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 662 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 663 664 /* compute B^{(i)T} * 1_p */ 665 np = 0; 666 if (zerodiag) { 667 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 668 } else if (pressures) { 669 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 670 } 671 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 672 for (i=0;i<np;i++) pvals[i] = 1.; 673 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 674 if (np) { 675 const PetscInt *pidxs; 676 677 if (zerodiag) { 678 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 679 } else if (pressures) { 680 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 681 } 682 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 683 if (zerodiag) { 684 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 685 } else if (pressures) { 686 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 687 } 688 } 689 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 690 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 691 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 692 ierr = PetscFree(pvals);CHKERRQ(ierr); 693 694 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 695 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 696 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 697 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 698 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 699 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 700 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 701 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 702 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 703 for (i=0;i<matis->sf_nleaves;i++) { 704 if (PetscGlobalRank != matis->sf_leafdata[i]) { 705 pvals[i] = 0.; 706 } 707 } 708 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 709 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 710 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 711 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 712 ierr = PetscFree(dummyins);CHKERRQ(ierr); 713 714 /* assembly quadrature vec and attach near null space to pmat */ 715 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 716 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 717 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 718 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 719 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 720 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 721 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 722 } 723 724 /* change of basis and p0 dofs */ 725 if (has_null_pressures) { 726 IS zerodiagc; 727 const PetscInt *idxs,*idxsc; 728 PetscInt i,s,*nnz; 729 730 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 731 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 732 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 733 /* local change of basis for pressures */ 734 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 735 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 736 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 737 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 738 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 739 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 740 for (i=0;i<pcbddc->benign_n;i++) { 741 PetscInt nzs,j; 742 743 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 744 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 745 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 746 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 747 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 748 } 749 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 750 ierr = PetscFree(nnz);CHKERRQ(ierr); 751 /* set identity on velocities */ 752 for (i=0;i<n-nz;i++) { 753 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 754 } 755 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 756 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 757 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 758 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 759 /* set change on pressures */ 760 for (s=0;s<pcbddc->benign_n;s++) { 761 PetscScalar *array; 762 PetscInt nzs; 763 764 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 765 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 766 for (i=0;i<nzs-1;i++) { 767 PetscScalar vals[2]; 768 PetscInt cols[2]; 769 770 cols[0] = idxs[i]; 771 cols[1] = idxs[nzs-1]; 772 vals[0] = 1.; 773 vals[1] = 1.; 774 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 775 } 776 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 777 for (i=0;i<nzs-1;i++) array[i] = -1.; 778 array[nzs-1] = 1.; 779 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 780 /* store local idxs for p0 */ 781 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 782 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 783 ierr = PetscFree(array);CHKERRQ(ierr); 784 } 785 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 787 /* project if needed */ 788 if (pcbddc->benign_change_explicit) { 789 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 790 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 791 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 792 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 793 } 794 /* store global idxs for p0 */ 795 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 796 } 797 pcbddc->benign_zerodiag_subs = zerodiag_subs; 798 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 799 800 /* determines if the coarse solver will be singular or not */ 801 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 802 *zerodiaglocal = zerodiag; 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 808 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 809 { 810 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 if (!pcbddc->benign_sf) { 815 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 816 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 817 } 818 if (get) { /* use SF to get values */ 819 PetscScalar *array; 820 821 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 822 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 823 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 824 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 825 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 826 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 828 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 835 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 836 { 837 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 838 PetscErrorCode ierr; 839 840 PetscFunctionBegin; 841 /* TODO: add error checking 842 - avoid nested pop (or push) calls. 843 - cannot push before pop. 844 - cannot call this if pcbddc->local_mat is NULL 845 */ 846 if (!pcbddc->benign_n) { 847 PetscFunctionReturn(0); 848 } 849 if (pop) { 850 if (pcbddc->benign_change_explicit) { 851 IS is_p0; 852 MatReuse reuse; 853 854 /* extract B_0 */ 855 reuse = MAT_INITIAL_MATRIX; 856 if (pcbddc->benign_B0) { 857 reuse = MAT_REUSE_MATRIX; 858 } 859 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 861 /* remove rows and cols from local problem */ 862 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 863 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 864 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 865 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 866 } else { 867 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 868 PetscScalar *vals; 869 PetscInt i,n,*idxs_ins; 870 871 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 872 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 873 if (!pcbddc->benign_B0) { 874 PetscInt *nnz; 875 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 876 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 877 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 878 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 879 for (i=0;i<pcbddc->benign_n;i++) { 880 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 881 nnz[i] = n - nnz[i]; 882 } 883 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 884 ierr = PetscFree(nnz);CHKERRQ(ierr); 885 } 886 887 for (i=0;i<pcbddc->benign_n;i++) { 888 PetscScalar *array; 889 PetscInt *idxs,j,nz,cum; 890 891 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 892 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 893 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 for (j=0;j<nz;j++) vals[j] = 1.; 895 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 896 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 897 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 898 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 899 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 900 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 901 cum = 0; 902 for (j=0;j<n;j++) { 903 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 904 vals[cum] = array[j]; 905 idxs_ins[cum] = j; 906 cum++; 907 } 908 } 909 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 911 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 916 } 917 } else { /* push */ 918 if (pcbddc->benign_change_explicit) { 919 PetscInt i; 920 921 for (i=0;i<pcbddc->benign_n;i++) { 922 PetscScalar *B0_vals; 923 PetscInt *B0_cols,B0_ncol; 924 925 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 926 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 927 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 928 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 929 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 930 } 931 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 933 } else { 934 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "PCBDDCAdaptiveSelection" 942 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 943 { 944 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 945 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 946 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 947 PetscBLASInt *B_iwork,*B_ifail; 948 PetscScalar *work,lwork; 949 PetscScalar *St,*S,*eigv; 950 PetscScalar *Sarray,*Starray; 951 PetscReal *eigs,thresh; 952 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 953 PetscBool allocated_S_St; 954 #if defined(PETSC_USE_COMPLEX) 955 PetscReal *rwork; 956 #endif 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 if (!sub_schurs->schur_explicit) { 961 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 962 } 963 964 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 965 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 966 } 967 968 if (pcbddc->dbg_flag) { 969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 970 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 971 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 972 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 973 } 974 975 if (pcbddc->dbg_flag) { 976 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 977 } 978 979 /* max size of subsets */ 980 mss = 0; 981 for (i=0;i<sub_schurs->n_subs;i++) { 982 PetscInt subset_size; 983 984 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 985 mss = PetscMax(mss,subset_size); 986 } 987 988 /* min/max and threshold */ 989 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 990 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 991 nmax = PetscMax(nmin,nmax); 992 allocated_S_St = PETSC_FALSE; 993 if (nmin) { 994 allocated_S_St = PETSC_TRUE; 995 } 996 997 /* allocate lapack workspace */ 998 cum = cum2 = 0; 999 maxneigs = 0; 1000 for (i=0;i<sub_schurs->n_subs;i++) { 1001 PetscInt n,subset_size; 1002 1003 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1004 n = PetscMin(subset_size,nmax); 1005 cum += subset_size; 1006 cum2 += subset_size*n; 1007 maxneigs = PetscMax(maxneigs,n); 1008 } 1009 if (mss) { 1010 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1011 PetscBLASInt B_itype = 1; 1012 PetscBLASInt B_N = mss; 1013 PetscReal zero = 0.0; 1014 PetscReal eps = 0.0; /* dlamch? */ 1015 1016 B_lwork = -1; 1017 S = NULL; 1018 St = NULL; 1019 eigs = NULL; 1020 eigv = NULL; 1021 B_iwork = NULL; 1022 B_ifail = NULL; 1023 #if defined(PETSC_USE_COMPLEX) 1024 rwork = NULL; 1025 #endif 1026 thresh = 1.0; 1027 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1028 #if defined(PETSC_USE_COMPLEX) 1029 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1030 #else 1031 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1032 #endif 1033 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1034 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1035 } else { 1036 /* TODO */ 1037 } 1038 } else { 1039 lwork = 0; 1040 } 1041 1042 nv = 0; 1043 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1044 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1045 } 1046 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1047 if (allocated_S_St) { 1048 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1049 } 1050 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1051 #if defined(PETSC_USE_COMPLEX) 1052 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1053 #endif 1054 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1055 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1056 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1057 nv+cum,&pcbddc->adaptive_constraints_idxs, 1058 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1059 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1060 1061 maxneigs = 0; 1062 cum = cumarray = 0; 1063 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1064 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1065 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1066 const PetscInt *idxs; 1067 1068 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1069 for (cum=0;cum<nv;cum++) { 1070 pcbddc->adaptive_constraints_n[cum] = 1; 1071 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1072 pcbddc->adaptive_constraints_data[cum] = 1.0; 1073 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1074 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1075 } 1076 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1077 } 1078 1079 if (mss) { /* multilevel */ 1080 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1081 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1082 } 1083 1084 thresh = pcbddc->adaptive_threshold; 1085 for (i=0;i<sub_schurs->n_subs;i++) { 1086 const PetscInt *idxs; 1087 PetscReal infty = PETSC_MAX_REAL; 1088 PetscInt j,subset_size,eigs_start = 0; 1089 PetscBLASInt B_N; 1090 PetscBool same_data = PETSC_FALSE; 1091 1092 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1093 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1094 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1095 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1096 if (sub_schurs->is_hermitian) { 1097 PetscInt j,k; 1098 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1099 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1100 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1101 } 1102 for (j=0;j<subset_size;j++) { 1103 for (k=j;k<subset_size;k++) { 1104 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1105 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1106 } 1107 } 1108 } else { 1109 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1110 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1111 } 1112 } else { 1113 S = Sarray + cumarray; 1114 St = Starray + cumarray; 1115 } 1116 /* see if we can save some work */ 1117 if (sub_schurs->n_subs == 1) { 1118 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1119 } 1120 1121 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1122 B_neigs = 0; 1123 } else { 1124 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1125 PetscBLASInt B_itype = 1; 1126 PetscBLASInt B_IL, B_IU; 1127 PetscReal eps = -1.0; /* dlamch? */ 1128 PetscInt nmin_s; 1129 1130 if (pcbddc->dbg_flag) { 1131 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1132 } 1133 1134 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1135 if (thresh > 1.+PETSC_SMALL) { 1136 1137 /* ask for eigenvalues larger than thresh */ 1138 #if defined(PETSC_USE_COMPLEX) 1139 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1140 #else 1141 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1142 #endif 1143 } else { 1144 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1145 B_IL = 1; 1146 #if defined(PETSC_USE_COMPLEX) 1147 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1148 #else 1149 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1150 #endif 1151 } 1152 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1153 if (B_ierr) { 1154 if (B_ierr < 0 ) { 1155 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1156 } else if (B_ierr <= B_N) { 1157 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1158 } else { 1159 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1160 } 1161 } 1162 1163 if (B_neigs > nmax) { 1164 if (pcbddc->dbg_flag) { 1165 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1166 } 1167 eigs_start = B_neigs -nmax; 1168 B_neigs = nmax; 1169 } 1170 1171 nmin_s = PetscMin(nmin,B_N); 1172 if (B_neigs < nmin_s) { 1173 PetscBLASInt B_neigs2; 1174 1175 B_IU = B_N - B_neigs; 1176 B_IL = B_N - nmin_s + 1; 1177 if (pcbddc->dbg_flag) { 1178 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1179 } 1180 if (sub_schurs->is_hermitian) { 1181 PetscInt j; 1182 for (j=0;j<subset_size;j++) { 1183 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1184 } 1185 for (j=0;j<subset_size;j++) { 1186 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1187 } 1188 } else { 1189 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1190 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1191 } 1192 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1193 #if defined(PETSC_USE_COMPLEX) 1194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1195 #else 1196 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1197 #endif 1198 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1199 B_neigs += B_neigs2; 1200 } 1201 if (B_ierr) { 1202 if (B_ierr < 0 ) { 1203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1204 } else if (B_ierr <= B_N) { 1205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1206 } else { 1207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1208 } 1209 } 1210 if (pcbddc->dbg_flag) { 1211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1212 for (j=0;j<B_neigs;j++) { 1213 if (eigs[j] == 0.0) { 1214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1217 } 1218 } 1219 } 1220 } else { 1221 /* TODO */ 1222 } 1223 } 1224 /* change the basis back to the original one */ 1225 if (sub_schurs->change) { 1226 Mat change,phi,phit; 1227 1228 if (pcbddc->dbg_flag > 1) { 1229 PetscInt ii; 1230 for (ii=0;ii<B_neigs;ii++) { 1231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1232 for (j=0;j<B_N;j++) { 1233 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1234 } 1235 } 1236 } 1237 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1238 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1239 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1240 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1241 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1242 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1243 } 1244 maxneigs = PetscMax(B_neigs,maxneigs); 1245 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1246 if (B_neigs) { 1247 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1248 1249 if (pcbddc->dbg_flag > 1) { 1250 PetscInt ii; 1251 for (ii=0;ii<B_neigs;ii++) { 1252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1253 for (j=0;j<B_N;j++) { 1254 #if defined(PETSC_USE_COMPLEX) 1255 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1256 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1258 #else 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1260 #endif 1261 } 1262 } 1263 } 1264 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1265 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1266 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1267 cum++; 1268 } 1269 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1270 /* shift for next computation */ 1271 cumarray += subset_size*subset_size; 1272 } 1273 if (pcbddc->dbg_flag) { 1274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1275 } 1276 1277 if (mss) { 1278 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1279 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1280 /* destroy matrices (junk) */ 1281 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1282 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1283 } 1284 if (allocated_S_St) { 1285 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1288 #if defined(PETSC_USE_COMPLEX) 1289 ierr = PetscFree(rwork);CHKERRQ(ierr); 1290 #endif 1291 if (pcbddc->dbg_flag) { 1292 PetscInt maxneigs_r; 1293 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "PCBDDCSetUpSolvers" 1301 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1302 { 1303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1304 PetscScalar *coarse_submat_vals; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1309 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1310 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1311 1312 /* Setup local neumann solver ksp_R */ 1313 /* PCBDDCSetUpLocalScatters should be called first! */ 1314 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1315 1316 /* Change global null space passed in by the user if change of basis has been requested */ 1317 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1318 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1319 } 1320 1321 /* 1322 Setup local correction and local part of coarse basis. 1323 Gives back the dense local part of the coarse matrix in column major ordering 1324 */ 1325 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1326 1327 /* Compute total number of coarse nodes and setup coarse solver */ 1328 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1329 1330 /* free */ 1331 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "PCBDDCResetCustomization" 1337 PetscErrorCode PCBDDCResetCustomization(PC pc) 1338 { 1339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1344 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1345 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1346 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1347 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1348 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1349 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1350 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1351 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1352 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1353 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1354 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "PCBDDCResetTopography" 1360 PetscErrorCode PCBDDCResetTopography(PC pc) 1361 { 1362 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1363 PetscInt i; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1368 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1369 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1370 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1371 for (i=0;i<pcbddc->n_local_subs;i++) { 1372 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1373 } 1374 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1375 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "PCBDDCResetSolvers" 1381 PetscErrorCode PCBDDCResetSolvers(PC pc) 1382 { 1383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1388 if (pcbddc->coarse_phi_B) { 1389 PetscScalar *array; 1390 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1391 ierr = PetscFree(array);CHKERRQ(ierr); 1392 } 1393 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1394 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1395 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1396 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1397 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1398 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1399 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1400 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1401 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1402 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1403 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1404 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1405 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1406 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1407 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1408 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1409 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1410 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1411 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1412 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1413 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1414 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1415 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1416 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1417 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1418 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1419 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1420 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1421 if (pcbddc->benign_zerodiag_subs) { 1422 PetscInt i; 1423 for (i=0;i<pcbddc->benign_n;i++) { 1424 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1427 } 1428 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1434 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1435 { 1436 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1437 PC_IS *pcis = (PC_IS*)pc->data; 1438 VecType impVecType; 1439 PetscInt n_constraints,n_R,old_size; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 if (!pcbddc->ConstraintMatrix) { 1444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1445 } 1446 /* get sizes */ 1447 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1448 n_R = pcis->n - pcbddc->n_vertices; 1449 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1450 /* local work vectors (try to avoid unneeded work)*/ 1451 /* R nodes */ 1452 old_size = -1; 1453 if (pcbddc->vec1_R) { 1454 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1455 } 1456 if (n_R != old_size) { 1457 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1458 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1459 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1460 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1461 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1462 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1463 } 1464 /* local primal dofs */ 1465 old_size = -1; 1466 if (pcbddc->vec1_P) { 1467 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1468 } 1469 if (pcbddc->local_primal_size != old_size) { 1470 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1471 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1472 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1473 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1474 } 1475 /* local explicit constraints */ 1476 old_size = -1; 1477 if (pcbddc->vec1_C) { 1478 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1479 } 1480 if (n_constraints && n_constraints != old_size) { 1481 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1483 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1484 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1485 } 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "PCBDDCSetUpCorrection" 1491 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1492 { 1493 PetscErrorCode ierr; 1494 /* pointers to pcis and pcbddc */ 1495 PC_IS* pcis = (PC_IS*)pc->data; 1496 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1497 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1498 /* submatrices of local problem */ 1499 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1500 /* submatrices of local coarse problem */ 1501 Mat S_VV,S_CV,S_VC,S_CC; 1502 /* working matrices */ 1503 Mat C_CR; 1504 /* additional working stuff */ 1505 PC pc_R; 1506 Mat F; 1507 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1508 1509 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1510 PetscScalar *work; 1511 PetscInt *idx_V_B; 1512 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1513 PetscInt i,n_R,n_D,n_B; 1514 1515 /* some shortcuts to scalars */ 1516 PetscScalar one=1.0,m_one=-1.0; 1517 1518 PetscFunctionBegin; 1519 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1520 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1521 } 1522 1523 /* Set Non-overlapping dimensions */ 1524 n_vertices = pcbddc->n_vertices; 1525 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1526 n_B = pcis->n_B; 1527 n_D = pcis->n - n_B; 1528 n_R = pcis->n - n_vertices; 1529 1530 /* vertices in boundary numbering */ 1531 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1532 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1533 if (i != n_vertices) { 1534 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1535 } 1536 1537 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1538 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1540 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1542 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1544 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1545 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1546 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1547 1548 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1549 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1550 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1551 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1552 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1553 lda_rhs = n_R; 1554 need_benign_correction = PETSC_FALSE; 1555 if (isLU || isILU || isCHOL) { 1556 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1557 } else if (sub_schurs->reuse_solver) { 1558 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1559 MatFactorType type; 1560 1561 F = reuse_solver->F; 1562 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1563 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1564 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1565 need_benign_correction = !!reuse_solver->benign_n; 1566 } else { 1567 F = NULL; 1568 } 1569 1570 /* allocate workspace */ 1571 n = 0; 1572 if (n_constraints) { 1573 n += lda_rhs*n_constraints; 1574 } 1575 if (n_vertices) { 1576 n = PetscMax(2*lda_rhs*n_vertices,n); 1577 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1578 } 1579 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1580 1581 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1582 if (n_constraints) { 1583 Mat M1,M2,M3,C_B; 1584 IS is_aux; 1585 PetscScalar *array,*array2; 1586 1587 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1588 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1589 1590 /* Extract constraints on R nodes: C_{CR} */ 1591 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1592 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1593 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1594 1595 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1596 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1597 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1598 for (i=0;i<n_constraints;i++) { 1599 const PetscScalar *row_cmat_values; 1600 const PetscInt *row_cmat_indices; 1601 PetscInt size_of_constraint,j; 1602 1603 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1604 for (j=0;j<size_of_constraint;j++) { 1605 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1606 } 1607 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1608 } 1609 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1610 if (F) { 1611 Mat B; 1612 1613 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1614 if (need_benign_correction) { 1615 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1616 1617 /* rhs is already zero on interior dofs, no need to change the rhs */ 1618 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1619 } 1620 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1621 if (need_benign_correction) { 1622 PetscScalar *marr; 1623 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1624 1625 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1626 for (i=0;i<n_constraints;i++) { 1627 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1628 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1630 } 1631 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1632 } 1633 ierr = MatDestroy(&B);CHKERRQ(ierr); 1634 } else { 1635 PetscScalar *marr; 1636 1637 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1638 for (i=0;i<n_constraints;i++) { 1639 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1640 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1641 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1642 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1643 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1644 } 1645 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1646 } 1647 if (!pcbddc->switch_static) { 1648 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1649 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1650 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1651 for (i=0;i<n_constraints;i++) { 1652 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1653 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1654 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1655 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1658 } 1659 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1660 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1661 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1662 } else { 1663 if (lda_rhs != n_R) { 1664 IS dummy; 1665 1666 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1667 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1668 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1669 } else { 1670 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1671 pcbddc->local_auxmat2 = local_auxmat2_R; 1672 } 1673 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1674 } 1675 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1676 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1677 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1678 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1679 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1680 if (isCHOL) { 1681 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1682 } else { 1683 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1684 } 1685 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1686 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1687 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1688 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1689 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1690 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1691 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1692 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1693 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1694 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1695 } 1696 1697 /* Get submatrices from subdomain matrix */ 1698 if (n_vertices) { 1699 IS is_aux; 1700 1701 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1702 IS tis; 1703 1704 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1705 ierr = ISSort(tis);CHKERRQ(ierr); 1706 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1707 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1708 } else { 1709 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1710 } 1711 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1712 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1713 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1714 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1715 } 1716 1717 /* Matrix of coarse basis functions (local) */ 1718 if (pcbddc->coarse_phi_B) { 1719 PetscInt on_B,on_primal,on_D=n_D; 1720 if (pcbddc->coarse_phi_D) { 1721 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1722 } 1723 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1724 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1725 PetscScalar *marray; 1726 1727 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1728 ierr = PetscFree(marray);CHKERRQ(ierr); 1729 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1730 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1731 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1732 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1733 } 1734 } 1735 1736 if (!pcbddc->coarse_phi_B) { 1737 PetscScalar *marray; 1738 1739 n = n_B*pcbddc->local_primal_size; 1740 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1741 n += n_D*pcbddc->local_primal_size; 1742 } 1743 if (!pcbddc->symmetric_primal) { 1744 n *= 2; 1745 } 1746 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1748 n = n_B*pcbddc->local_primal_size; 1749 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1751 n += n_D*pcbddc->local_primal_size; 1752 } 1753 if (!pcbddc->symmetric_primal) { 1754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1755 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1756 n = n_B*pcbddc->local_primal_size; 1757 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1758 } 1759 } else { 1760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1761 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1763 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1764 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1765 } 1766 } 1767 } 1768 1769 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1770 p0_lidx_I = NULL; 1771 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1772 const PetscInt *idxs; 1773 1774 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1775 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1776 for (i=0;i<pcbddc->benign_n;i++) { 1777 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1778 } 1779 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1780 } 1781 1782 /* vertices */ 1783 if (n_vertices) { 1784 1785 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1786 1787 if (n_R) { 1788 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1789 PetscBLASInt B_N,B_one = 1; 1790 PetscScalar *x,*y; 1791 PetscBool isseqaij; 1792 1793 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1794 if (need_benign_correction) { 1795 ISLocalToGlobalMapping RtoN; 1796 IS is_p0; 1797 PetscInt *idxs_p0,n; 1798 1799 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1800 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1801 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1802 if (n != pcbddc->benign_n) { 1803 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1804 } 1805 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1806 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1807 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1808 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1809 } 1810 1811 if (lda_rhs == n_R) { 1812 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1813 } else { 1814 PetscScalar *av,*array; 1815 const PetscInt *xadj,*adjncy; 1816 PetscInt n; 1817 PetscBool flg_row; 1818 1819 array = work+lda_rhs*n_vertices; 1820 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1821 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1822 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1823 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1824 for (i=0;i<n;i++) { 1825 PetscInt j; 1826 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1827 } 1828 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1829 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1830 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1831 } 1832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1833 if (need_benign_correction) { 1834 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1835 PetscScalar *marr; 1836 1837 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1838 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1839 1840 | 0 0 0 | (V) 1841 L = | 0 0 -1 | (P-p0) 1842 | 0 0 -1 | (p0) 1843 1844 */ 1845 for (i=0;i<reuse_solver->benign_n;i++) { 1846 const PetscScalar *vals; 1847 const PetscInt *idxs,*idxs_zero; 1848 PetscInt n,j,nz; 1849 1850 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1851 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1852 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1853 for (j=0;j<n;j++) { 1854 PetscScalar val = vals[j]; 1855 PetscInt k,col = idxs[j]; 1856 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1857 } 1858 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1859 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1860 } 1861 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1862 } 1863 if (F) { 1864 /* need to correct the rhs */ 1865 if (need_benign_correction) { 1866 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1867 PetscScalar *marr; 1868 1869 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1870 for (i=0;i<n_vertices;i++) { 1871 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1872 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1873 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1874 } 1875 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1876 } 1877 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1878 /* need to correct the solution */ 1879 if (need_benign_correction) { 1880 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1881 PetscScalar *marr; 1882 1883 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1884 for (i=0;i<n_vertices;i++) { 1885 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1886 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1887 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1888 } 1889 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1890 } 1891 } else { 1892 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1893 for (i=0;i<n_vertices;i++) { 1894 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1895 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1896 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1897 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1898 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1899 } 1900 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1901 } 1902 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1903 /* S_VV and S_CV */ 1904 if (n_constraints) { 1905 Mat B; 1906 1907 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1908 for (i=0;i<n_vertices;i++) { 1909 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1910 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1911 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1912 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1913 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1914 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1915 } 1916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1917 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1918 ierr = MatDestroy(&B);CHKERRQ(ierr); 1919 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1920 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1921 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1922 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1923 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1924 ierr = MatDestroy(&B);CHKERRQ(ierr); 1925 } 1926 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1927 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1928 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1929 } 1930 if (lda_rhs != n_R) { 1931 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1932 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1933 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1934 } 1935 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1936 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1937 if (need_benign_correction) { 1938 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1939 PetscScalar *marr,*sums; 1940 1941 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1942 ierr = MatDenseGetArray(S_VVt,&marr); 1943 for (i=0;i<reuse_solver->benign_n;i++) { 1944 const PetscScalar *vals; 1945 const PetscInt *idxs,*idxs_zero; 1946 PetscInt n,j,nz; 1947 1948 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1949 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1950 for (j=0;j<n_vertices;j++) { 1951 PetscInt k; 1952 sums[j] = 0.; 1953 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1954 } 1955 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1956 for (j=0;j<n;j++) { 1957 PetscScalar val = vals[j]; 1958 PetscInt k; 1959 for (k=0;k<n_vertices;k++) { 1960 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1961 } 1962 } 1963 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1964 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1965 } 1966 ierr = PetscFree(sums);CHKERRQ(ierr); 1967 ierr = MatDenseRestoreArray(S_VVt,&marr); 1968 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1969 } 1970 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1972 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1973 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1974 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1975 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1976 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1977 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1978 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1979 } else { 1980 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1981 } 1982 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1983 1984 /* coarse basis functions */ 1985 for (i=0;i<n_vertices;i++) { 1986 PetscScalar *y; 1987 1988 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1989 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1990 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1991 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1992 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1993 y[n_B*i+idx_V_B[i]] = 1.0; 1994 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1995 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1996 1997 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1998 PetscInt j; 1999 2000 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2005 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2006 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2007 } 2008 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2009 } 2010 /* if n_R == 0 the object is not destroyed */ 2011 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2012 } 2013 2014 if (n_constraints) { 2015 Mat B; 2016 2017 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2018 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2019 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2020 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2021 if (n_vertices) { 2022 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2023 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2024 } else { 2025 Mat S_VCt; 2026 2027 if (lda_rhs != n_R) { 2028 ierr = MatDestroy(&B);CHKERRQ(ierr); 2029 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2030 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2031 } 2032 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2033 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2034 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2035 } 2036 } 2037 ierr = MatDestroy(&B);CHKERRQ(ierr); 2038 /* coarse basis functions */ 2039 for (i=0;i<n_constraints;i++) { 2040 PetscScalar *y; 2041 2042 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2043 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2044 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2045 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2046 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2047 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2048 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2049 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2050 PetscInt j; 2051 2052 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2053 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2054 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2055 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2056 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2057 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2058 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2059 } 2060 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2061 } 2062 } 2063 if (n_constraints) { 2064 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2065 } 2066 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2067 2068 /* coarse matrix entries relative to B_0 */ 2069 if (pcbddc->benign_n) { 2070 Mat B0_B,B0_BPHI; 2071 IS is_dummy; 2072 PetscScalar *data; 2073 PetscInt j; 2074 2075 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2076 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2077 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2078 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2079 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2080 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2081 for (j=0;j<pcbddc->benign_n;j++) { 2082 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2083 for (i=0;i<pcbddc->local_primal_size;i++) { 2084 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2085 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2086 } 2087 } 2088 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2089 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2090 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2091 } 2092 2093 /* compute other basis functions for non-symmetric problems */ 2094 if (!pcbddc->symmetric_primal) { 2095 Mat B_V=NULL,B_C=NULL; 2096 PetscScalar *marray; 2097 2098 if (n_constraints) { 2099 Mat S_CCT,C_CRT; 2100 2101 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2102 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2103 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2104 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2105 if (n_vertices) { 2106 Mat S_VCT; 2107 2108 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2109 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2110 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2111 } 2112 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2113 } 2114 if (n_vertices && n_R) { 2115 PetscScalar *av,*marray; 2116 const PetscInt *xadj,*adjncy; 2117 PetscInt n; 2118 PetscBool flg_row; 2119 2120 /* B_V = B_V - A_VR^T */ 2121 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2122 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2123 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2124 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2125 for (i=0;i<n;i++) { 2126 PetscInt j; 2127 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2128 } 2129 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2130 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2131 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2132 } 2133 2134 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2135 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2136 for (i=0;i<n_vertices;i++) { 2137 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2138 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2139 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2140 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2141 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2142 } 2143 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2144 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2145 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2146 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2147 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2148 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2149 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2150 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2151 } 2152 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2153 /* coarse basis functions */ 2154 for (i=0;i<pcbddc->local_primal_size;i++) { 2155 PetscScalar *y; 2156 2157 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2158 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2159 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2160 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2161 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2162 if (i<n_vertices) { 2163 y[n_B*i+idx_V_B[i]] = 1.0; 2164 } 2165 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2166 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2167 2168 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2169 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2170 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2171 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2172 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2173 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2174 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2175 } 2176 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2177 } 2178 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2179 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2180 } 2181 /* free memory */ 2182 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2183 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2184 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2185 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2186 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2187 ierr = PetscFree(work);CHKERRQ(ierr); 2188 if (n_vertices) { 2189 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2190 } 2191 if (n_constraints) { 2192 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2193 } 2194 /* Checking coarse_sub_mat and coarse basis functios */ 2195 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2196 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2197 if (pcbddc->dbg_flag) { 2198 Mat coarse_sub_mat; 2199 Mat AUXMAT,TM1,TM2,TM3,TM4; 2200 Mat coarse_phi_D,coarse_phi_B; 2201 Mat coarse_psi_D,coarse_psi_B; 2202 Mat A_II,A_BB,A_IB,A_BI; 2203 Mat C_B,CPHI; 2204 IS is_dummy; 2205 Vec mones; 2206 MatType checkmattype=MATSEQAIJ; 2207 PetscReal real_value; 2208 2209 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2210 Mat A; 2211 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2212 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2213 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2214 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2215 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2216 ierr = MatDestroy(&A);CHKERRQ(ierr); 2217 } else { 2218 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2219 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2220 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2221 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2222 } 2223 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2224 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2225 if (!pcbddc->symmetric_primal) { 2226 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2227 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2228 } 2229 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2230 2231 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2232 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2233 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2234 if (!pcbddc->symmetric_primal) { 2235 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2236 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2237 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2238 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2239 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2240 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2241 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2242 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2243 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2244 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2245 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2246 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2247 } else { 2248 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2249 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2250 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2251 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2252 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2253 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2254 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2255 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2256 } 2257 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2258 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2259 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2260 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2261 if (pcbddc->benign_n) { 2262 Mat B0_B,B0_BPHI; 2263 PetscScalar *data,*data2; 2264 PetscInt j; 2265 2266 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2267 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2268 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2269 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2270 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2271 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2272 for (j=0;j<pcbddc->benign_n;j++) { 2273 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2274 for (i=0;i<pcbddc->local_primal_size;i++) { 2275 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2276 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2277 } 2278 } 2279 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2280 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2281 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2282 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2283 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2284 } 2285 #if 0 2286 { 2287 PetscViewer viewer; 2288 char filename[256]; 2289 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2290 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2291 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2292 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2293 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2294 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2295 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2296 if (save_change) { 2297 Mat phi_B; 2298 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2299 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2300 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2301 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2302 } else { 2303 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2304 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2305 } 2306 if (pcbddc->coarse_phi_D) { 2307 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2308 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2309 } 2310 if (pcbddc->coarse_psi_B) { 2311 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2312 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2313 } 2314 if (pcbddc->coarse_psi_D) { 2315 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2316 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2317 } 2318 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2319 } 2320 #endif 2321 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2322 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2323 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2324 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2325 2326 /* check constraints */ 2327 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2328 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2329 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2330 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2331 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2332 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2333 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2334 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2336 if (!pcbddc->symmetric_primal) { 2337 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2338 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2339 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2340 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2341 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2342 } 2343 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2344 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2345 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2346 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2347 } 2348 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2349 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2350 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2351 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2352 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2353 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2354 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2355 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2356 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2357 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2358 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2359 if (!pcbddc->symmetric_primal) { 2360 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2361 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2362 } 2363 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2364 } 2365 /* get back data */ 2366 *coarse_submat_vals_n = coarse_submat_vals; 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2372 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2373 { 2374 Mat *work_mat; 2375 IS isrow_s,iscol_s; 2376 PetscBool rsorted,csorted; 2377 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2378 PetscErrorCode ierr; 2379 2380 PetscFunctionBegin; 2381 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2382 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2383 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2384 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2385 2386 if (!rsorted) { 2387 const PetscInt *idxs; 2388 PetscInt *idxs_sorted,i; 2389 2390 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2391 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2392 for (i=0;i<rsize;i++) { 2393 idxs_perm_r[i] = i; 2394 } 2395 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2396 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2397 for (i=0;i<rsize;i++) { 2398 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2399 } 2400 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2401 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2402 } else { 2403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2404 isrow_s = isrow; 2405 } 2406 2407 if (!csorted) { 2408 if (isrow == iscol) { 2409 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2410 iscol_s = isrow_s; 2411 } else { 2412 const PetscInt *idxs; 2413 PetscInt *idxs_sorted,i; 2414 2415 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2416 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2417 for (i=0;i<csize;i++) { 2418 idxs_perm_c[i] = i; 2419 } 2420 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2421 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2422 for (i=0;i<csize;i++) { 2423 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2424 } 2425 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2426 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2427 } 2428 } else { 2429 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2430 iscol_s = iscol; 2431 } 2432 2433 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2434 2435 if (!rsorted || !csorted) { 2436 Mat new_mat; 2437 IS is_perm_r,is_perm_c; 2438 2439 if (!rsorted) { 2440 PetscInt *idxs_r,i; 2441 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2442 for (i=0;i<rsize;i++) { 2443 idxs_r[idxs_perm_r[i]] = i; 2444 } 2445 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2446 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2447 } else { 2448 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2449 } 2450 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2451 2452 if (!csorted) { 2453 if (isrow_s == iscol_s) { 2454 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2455 is_perm_c = is_perm_r; 2456 } else { 2457 PetscInt *idxs_c,i; 2458 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2459 for (i=0;i<csize;i++) { 2460 idxs_c[idxs_perm_c[i]] = i; 2461 } 2462 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2463 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2464 } 2465 } else { 2466 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2467 } 2468 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2469 2470 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2471 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2472 work_mat[0] = new_mat; 2473 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2474 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2475 } 2476 2477 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2478 *B = work_mat[0]; 2479 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2480 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2481 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2487 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2488 { 2489 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2490 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2491 Mat new_mat; 2492 IS is_local,is_global; 2493 PetscInt local_size; 2494 PetscBool isseqaij; 2495 PetscErrorCode ierr; 2496 2497 PetscFunctionBegin; 2498 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2499 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2500 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2501 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2502 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2503 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2504 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2505 2506 /* check */ 2507 if (pcbddc->dbg_flag) { 2508 Vec x,x_change; 2509 PetscReal error; 2510 2511 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2512 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2513 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2514 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2515 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2517 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2518 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2519 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2520 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2522 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2523 ierr = VecDestroy(&x);CHKERRQ(ierr); 2524 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2525 } 2526 2527 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2528 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2529 if (isseqaij) { 2530 Mat M; 2531 2532 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2533 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2534 ierr = MatDestroy(&M);CHKERRQ(ierr); 2535 } else { 2536 Mat work_mat,M; 2537 2538 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2539 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2540 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2541 ierr = MatDestroy(&M);CHKERRQ(ierr); 2542 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2543 } 2544 if (matis->A->symmetric_set) { 2545 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2546 #if !defined(PETSC_USE_COMPLEX) 2547 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2548 #endif 2549 } 2550 /* 2551 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2552 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2553 */ 2554 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2560 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2561 { 2562 PC_IS* pcis = (PC_IS*)(pc->data); 2563 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2564 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2565 PetscInt *idx_R_local=NULL; 2566 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2567 PetscInt vbs,bs; 2568 PetscBT bitmask=NULL; 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 /* 2573 No need to setup local scatters if 2574 - primal space is unchanged 2575 AND 2576 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2577 AND 2578 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2579 */ 2580 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2581 PetscFunctionReturn(0); 2582 } 2583 /* destroy old objects */ 2584 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2585 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2586 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2587 /* Set Non-overlapping dimensions */ 2588 n_B = pcis->n_B; 2589 n_D = pcis->n - n_B; 2590 n_vertices = pcbddc->n_vertices; 2591 2592 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2593 2594 /* create auxiliary bitmask and allocate workspace */ 2595 if (!sub_schurs->reuse_solver) { 2596 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2597 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2598 for (i=0;i<n_vertices;i++) { 2599 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2600 } 2601 2602 for (i=0, n_R=0; i<pcis->n; i++) { 2603 if (!PetscBTLookup(bitmask,i)) { 2604 idx_R_local[n_R++] = i; 2605 } 2606 } 2607 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2608 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2609 2610 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2611 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2612 } 2613 2614 /* Block code */ 2615 vbs = 1; 2616 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2617 if (bs>1 && !(n_vertices%bs)) { 2618 PetscBool is_blocked = PETSC_TRUE; 2619 PetscInt *vary; 2620 if (!sub_schurs->reuse_solver) { 2621 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2622 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2623 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2624 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2625 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2626 for (i=0; i<pcis->n/bs; i++) { 2627 if (vary[i]!=0 && vary[i]!=bs) { 2628 is_blocked = PETSC_FALSE; 2629 break; 2630 } 2631 } 2632 ierr = PetscFree(vary);CHKERRQ(ierr); 2633 } else { 2634 /* Verify directly the R set */ 2635 for (i=0; i<n_R/bs; i++) { 2636 PetscInt j,node=idx_R_local[bs*i]; 2637 for (j=1; j<bs; j++) { 2638 if (node != idx_R_local[bs*i+j]-j) { 2639 is_blocked = PETSC_FALSE; 2640 break; 2641 } 2642 } 2643 } 2644 } 2645 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2646 vbs = bs; 2647 for (i=0;i<n_R/vbs;i++) { 2648 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2649 } 2650 } 2651 } 2652 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2653 if (sub_schurs->reuse_solver) { 2654 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2655 2656 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2657 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2658 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2659 reuse_solver->is_R = pcbddc->is_R_local; 2660 } else { 2661 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2662 } 2663 2664 /* print some info if requested */ 2665 if (pcbddc->dbg_flag) { 2666 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2667 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2668 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2673 } 2674 2675 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2676 if (!sub_schurs->reuse_solver) { 2677 IS is_aux1,is_aux2; 2678 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2679 2680 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2681 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2682 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2683 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2684 for (i=0; i<n_D; i++) { 2685 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2686 } 2687 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2688 for (i=0, j=0; i<n_R; i++) { 2689 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2690 aux_array1[j++] = i; 2691 } 2692 } 2693 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2694 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2695 for (i=0, j=0; i<n_B; i++) { 2696 if (!PetscBTLookup(bitmask,is_indices[i])) { 2697 aux_array2[j++] = i; 2698 } 2699 } 2700 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2701 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2702 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2703 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2704 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2705 2706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2707 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2708 for (i=0, j=0; i<n_R; i++) { 2709 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2710 aux_array1[j++] = i; 2711 } 2712 } 2713 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2714 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2715 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2716 } 2717 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2718 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2719 } else { 2720 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2721 IS tis; 2722 PetscInt schur_size; 2723 2724 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2725 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2726 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2727 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2728 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2729 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2730 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2731 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2732 } 2733 } 2734 PetscFunctionReturn(0); 2735 } 2736 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2740 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2741 { 2742 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2743 PC_IS *pcis = (PC_IS*)pc->data; 2744 PC pc_temp; 2745 Mat A_RR; 2746 MatReuse reuse; 2747 PetscScalar m_one = -1.0; 2748 PetscReal value; 2749 PetscInt n_D,n_R; 2750 PetscBool use_exact,use_exact_reduced,issbaij; 2751 PetscErrorCode ierr; 2752 /* prefixes stuff */ 2753 char dir_prefix[256],neu_prefix[256],str_level[16]; 2754 size_t len; 2755 2756 PetscFunctionBegin; 2757 2758 /* compute prefixes */ 2759 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2760 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2761 if (!pcbddc->current_level) { 2762 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2763 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2764 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2765 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2766 } else { 2767 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2768 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2769 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2770 len -= 15; /* remove "pc_bddc_coarse_" */ 2771 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2772 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2773 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2774 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2775 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2776 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2777 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2778 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2779 } 2780 2781 /* DIRICHLET PROBLEM */ 2782 if (dirichlet) { 2783 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2784 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2785 if (!sub_schurs->reuse_solver) { 2786 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2787 } 2788 if (pcbddc->dbg_flag) { 2789 Mat A_IIn; 2790 2791 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2792 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2793 pcis->A_II = A_IIn; 2794 } 2795 } 2796 if (pcbddc->local_mat->symmetric_set) { 2797 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2798 } 2799 /* Matrix for Dirichlet problem is pcis->A_II */ 2800 n_D = pcis->n - pcis->n_B; 2801 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2802 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2803 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2804 /* default */ 2805 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2806 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2807 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2808 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2809 if (issbaij) { 2810 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2811 } else { 2812 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2813 } 2814 /* Allow user's customization */ 2815 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2816 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2817 } 2818 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2819 if (sub_schurs->reuse_solver) { 2820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2821 2822 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2823 } 2824 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2825 if (!n_D) { 2826 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2827 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2828 } 2829 /* Set Up KSP for Dirichlet problem of BDDC */ 2830 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2831 /* set ksp_D into pcis data */ 2832 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2833 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2834 pcis->ksp_D = pcbddc->ksp_D; 2835 } 2836 2837 /* NEUMANN PROBLEM */ 2838 A_RR = 0; 2839 if (neumann) { 2840 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2841 PetscInt ibs,mbs; 2842 PetscBool issbaij; 2843 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2844 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2845 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2846 if (pcbddc->ksp_R) { /* already created ksp */ 2847 PetscInt nn_R; 2848 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2849 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2850 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2851 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2852 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2853 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2854 reuse = MAT_INITIAL_MATRIX; 2855 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2856 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2857 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2858 reuse = MAT_INITIAL_MATRIX; 2859 } else { /* safe to reuse the matrix */ 2860 reuse = MAT_REUSE_MATRIX; 2861 } 2862 } 2863 /* last check */ 2864 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2865 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2866 reuse = MAT_INITIAL_MATRIX; 2867 } 2868 } else { /* first time, so we need to create the matrix */ 2869 reuse = MAT_INITIAL_MATRIX; 2870 } 2871 /* extract A_RR */ 2872 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2873 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2874 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2875 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2876 if (matis->A == pcbddc->local_mat) { 2877 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2878 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2879 } else { 2880 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2881 } 2882 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2883 if (matis->A == pcbddc->local_mat) { 2884 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2885 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2886 } else { 2887 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2888 } 2889 } 2890 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2891 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2892 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2893 } else { 2894 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2895 } 2896 if (pcbddc->local_mat->symmetric_set) { 2897 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2898 } 2899 if (!pcbddc->ksp_R) { /* create object if not present */ 2900 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2901 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2902 /* default */ 2903 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2904 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2905 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2906 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2907 if (issbaij) { 2908 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2909 } else { 2910 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2911 } 2912 /* Allow user's customization */ 2913 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2914 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2915 } 2916 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2917 if (!n_R) { 2918 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2919 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2920 } 2921 /* Reuse solver if it is present */ 2922 if (sub_schurs->reuse_solver) { 2923 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2924 2925 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2926 } 2927 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2928 /* Set Up KSP for Neumann problem of BDDC */ 2929 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2930 } 2931 /* free Neumann problem's matrix */ 2932 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2933 2934 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2935 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2936 if (pcbddc->dbg_flag) { 2937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2938 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2939 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2940 } 2941 if (dirichlet) { /* Dirichlet */ 2942 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2943 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2944 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2945 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2946 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2947 /* need to be adapted? */ 2948 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2949 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2950 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2951 /* print info */ 2952 if (pcbddc->dbg_flag) { 2953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2954 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2955 } 2956 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2957 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2958 } 2959 } 2960 if (neumann) { /* Neumann */ 2961 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2962 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2963 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2964 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2965 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2966 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2967 /* need to be adapted? */ 2968 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2969 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2970 /* print info */ 2971 if (pcbddc->dbg_flag) { 2972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2974 } 2975 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2976 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2977 } 2978 } 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2985 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2986 { 2987 PetscErrorCode ierr; 2988 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2989 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2990 2991 PetscFunctionBegin; 2992 if (!sub_schurs->reuse_solver) { 2993 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2994 } 2995 if (!pcbddc->switch_static) { 2996 if (applytranspose && pcbddc->local_auxmat1) { 2997 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2998 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2999 } 3000 if (!sub_schurs->reuse_solver) { 3001 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3002 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3003 } else { 3004 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3005 3006 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3007 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3008 } 3009 } else { 3010 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3011 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3012 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3013 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3014 if (applytranspose && pcbddc->local_auxmat1) { 3015 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3016 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3017 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3018 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3019 } 3020 } 3021 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3022 if (applytranspose) { 3023 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3024 } else { 3025 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3026 } 3027 } else { 3028 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3029 3030 if (applytranspose) { 3031 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3032 } else { 3033 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3034 } 3035 } 3036 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3037 if (!pcbddc->switch_static) { 3038 if (!sub_schurs->reuse_solver) { 3039 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3040 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3041 } else { 3042 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3043 3044 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3045 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3046 } 3047 if (!applytranspose && pcbddc->local_auxmat1) { 3048 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3049 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3050 } 3051 } else { 3052 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3053 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3054 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3055 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3056 if (!applytranspose && pcbddc->local_auxmat1) { 3057 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3058 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3059 } 3060 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3061 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3062 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3063 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3064 } 3065 PetscFunctionReturn(0); 3066 } 3067 3068 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3071 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3072 { 3073 PetscErrorCode ierr; 3074 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3075 PC_IS* pcis = (PC_IS*) (pc->data); 3076 const PetscScalar zero = 0.0; 3077 3078 PetscFunctionBegin; 3079 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3080 if (!pcbddc->benign_apply_coarse_only) { 3081 if (applytranspose) { 3082 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3083 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3084 } else { 3085 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3086 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3087 } 3088 } else { 3089 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3090 } 3091 3092 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3093 if (pcbddc->benign_n) { 3094 PetscScalar *array; 3095 PetscInt j; 3096 3097 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3098 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3099 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3100 } 3101 3102 /* start communications from local primal nodes to rhs of coarse solver */ 3103 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3104 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 3107 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3108 /* TODO remove null space when doing multilevel */ 3109 if (pcbddc->coarse_ksp) { 3110 Mat coarse_mat; 3111 Vec rhs,sol; 3112 MatNullSpace nullsp; 3113 3114 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3115 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3116 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3117 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3118 if (nullsp) { 3119 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3120 } 3121 if (applytranspose) { 3122 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3123 } else { 3124 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3125 } 3126 if (nullsp) { 3127 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3128 } 3129 } 3130 3131 /* Local solution on R nodes */ 3132 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3133 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3134 } 3135 /* communications from coarse sol to local primal nodes */ 3136 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3137 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3138 3139 /* Sum contributions from the two levels */ 3140 if (!pcbddc->benign_apply_coarse_only) { 3141 if (applytranspose) { 3142 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3143 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3144 } else { 3145 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3146 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3147 } 3148 /* store p0 */ 3149 if (pcbddc->benign_n) { 3150 PetscScalar *array; 3151 PetscInt j; 3152 3153 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3154 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3155 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3156 } 3157 } else { /* expand the coarse solution */ 3158 if (applytranspose) { 3159 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3160 } else { 3161 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3162 } 3163 } 3164 PetscFunctionReturn(0); 3165 } 3166 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3169 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3170 { 3171 PetscErrorCode ierr; 3172 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3173 PetscScalar *array; 3174 Vec from,to; 3175 3176 PetscFunctionBegin; 3177 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3178 from = pcbddc->coarse_vec; 3179 to = pcbddc->vec1_P; 3180 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3181 Vec tvec; 3182 3183 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3184 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3185 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3186 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3187 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3188 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3189 } 3190 } else { /* from local to global -> put data in coarse right hand side */ 3191 from = pcbddc->vec1_P; 3192 to = pcbddc->coarse_vec; 3193 } 3194 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3195 PetscFunctionReturn(0); 3196 } 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3200 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3201 { 3202 PetscErrorCode ierr; 3203 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3204 PetscScalar *array; 3205 Vec from,to; 3206 3207 PetscFunctionBegin; 3208 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3209 from = pcbddc->coarse_vec; 3210 to = pcbddc->vec1_P; 3211 } else { /* from local to global -> put data in coarse right hand side */ 3212 from = pcbddc->vec1_P; 3213 to = pcbddc->coarse_vec; 3214 } 3215 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3216 if (smode == SCATTER_FORWARD) { 3217 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3218 Vec tvec; 3219 3220 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3221 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3222 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3223 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3224 } 3225 } else { 3226 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3227 ierr = VecResetArray(from);CHKERRQ(ierr); 3228 } 3229 } 3230 PetscFunctionReturn(0); 3231 } 3232 3233 /* uncomment for testing purposes */ 3234 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3235 #undef __FUNCT__ 3236 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3237 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3238 { 3239 PetscErrorCode ierr; 3240 PC_IS* pcis = (PC_IS*)(pc->data); 3241 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3243 /* one and zero */ 3244 PetscScalar one=1.0,zero=0.0; 3245 /* space to store constraints and their local indices */ 3246 PetscScalar *constraints_data; 3247 PetscInt *constraints_idxs,*constraints_idxs_B; 3248 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3249 PetscInt *constraints_n; 3250 /* iterators */ 3251 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3252 /* BLAS integers */ 3253 PetscBLASInt lwork,lierr; 3254 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3255 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3256 /* reuse */ 3257 PetscInt olocal_primal_size,olocal_primal_size_cc; 3258 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3259 /* change of basis */ 3260 PetscBool qr_needed; 3261 PetscBT change_basis,qr_needed_idx; 3262 /* auxiliary stuff */ 3263 PetscInt *nnz,*is_indices; 3264 PetscInt ncc; 3265 /* some quantities */ 3266 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3267 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3268 3269 PetscFunctionBegin; 3270 /* Destroy Mat objects computed previously */ 3271 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3272 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3273 /* save info on constraints from previous setup (if any) */ 3274 olocal_primal_size = pcbddc->local_primal_size; 3275 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3276 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3277 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3278 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3279 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3280 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3281 3282 /* print some info */ 3283 if (pcbddc->dbg_flag) { 3284 IS vertices; 3285 PetscInt nv,nedges,nfaces; 3286 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3287 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3288 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3289 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3292 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3294 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3295 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3296 } 3297 3298 if (!pcbddc->adaptive_selection) { 3299 IS ISForVertices,*ISForFaces,*ISForEdges; 3300 MatNullSpace nearnullsp; 3301 const Vec *nearnullvecs; 3302 Vec *localnearnullsp; 3303 PetscScalar *array; 3304 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3305 PetscBool nnsp_has_cnst; 3306 /* LAPACK working arrays for SVD or POD */ 3307 PetscBool skip_lapack,boolforchange; 3308 PetscScalar *work; 3309 PetscReal *singular_vals; 3310 #if defined(PETSC_USE_COMPLEX) 3311 PetscReal *rwork; 3312 #endif 3313 #if defined(PETSC_MISSING_LAPACK_GESVD) 3314 PetscScalar *temp_basis,*correlation_mat; 3315 #else 3316 PetscBLASInt dummy_int=1; 3317 PetscScalar dummy_scalar=1.; 3318 #endif 3319 3320 /* Get index sets for faces, edges and vertices from graph */ 3321 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3322 /* free unneeded index sets */ 3323 if (!pcbddc->use_vertices) { 3324 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3325 } 3326 if (!pcbddc->use_edges) { 3327 for (i=0;i<n_ISForEdges;i++) { 3328 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3329 } 3330 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3331 n_ISForEdges = 0; 3332 } 3333 if (!pcbddc->use_faces) { 3334 for (i=0;i<n_ISForFaces;i++) { 3335 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3336 } 3337 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3338 n_ISForFaces = 0; 3339 } 3340 3341 #if defined(PETSC_USE_DEBUG) 3342 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3343 Also use_change_of_basis should be consistent among processors */ 3344 if (pcbddc->NullSpace) { 3345 PetscBool tbool[2],gbool[2]; 3346 3347 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3348 pcbddc->use_change_of_basis = PETSC_TRUE; 3349 if (!ISForEdges) { 3350 pcbddc->use_change_on_faces = PETSC_TRUE; 3351 } 3352 } 3353 tbool[0] = pcbddc->use_change_of_basis; 3354 tbool[1] = pcbddc->use_change_on_faces; 3355 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3356 pcbddc->use_change_of_basis = gbool[0]; 3357 pcbddc->use_change_on_faces = gbool[1]; 3358 } 3359 #endif 3360 3361 /* check if near null space is attached to global mat */ 3362 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3363 if (nearnullsp) { 3364 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3365 /* remove any stored info */ 3366 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3367 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3368 /* store information for BDDC solver reuse */ 3369 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3370 pcbddc->onearnullspace = nearnullsp; 3371 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3372 for (i=0;i<nnsp_size;i++) { 3373 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3374 } 3375 } else { /* if near null space is not provided BDDC uses constants by default */ 3376 nnsp_size = 0; 3377 nnsp_has_cnst = PETSC_TRUE; 3378 } 3379 /* get max number of constraints on a single cc */ 3380 max_constraints = nnsp_size; 3381 if (nnsp_has_cnst) max_constraints++; 3382 3383 /* 3384 Evaluate maximum storage size needed by the procedure 3385 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3386 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3387 There can be multiple constraints per connected component 3388 */ 3389 n_vertices = 0; 3390 if (ISForVertices) { 3391 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3392 } 3393 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3394 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3395 3396 total_counts = n_ISForFaces+n_ISForEdges; 3397 total_counts *= max_constraints; 3398 total_counts += n_vertices; 3399 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3400 3401 total_counts = 0; 3402 max_size_of_constraint = 0; 3403 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3404 IS used_is; 3405 if (i<n_ISForEdges) { 3406 used_is = ISForEdges[i]; 3407 } else { 3408 used_is = ISForFaces[i-n_ISForEdges]; 3409 } 3410 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3411 total_counts += j; 3412 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3413 } 3414 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3415 3416 /* get local part of global near null space vectors */ 3417 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3418 for (k=0;k<nnsp_size;k++) { 3419 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3420 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3421 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3422 } 3423 3424 /* whether or not to skip lapack calls */ 3425 skip_lapack = PETSC_TRUE; 3426 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3427 3428 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3429 if (!skip_lapack) { 3430 PetscScalar temp_work; 3431 3432 #if defined(PETSC_MISSING_LAPACK_GESVD) 3433 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3434 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3435 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3436 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3437 #if defined(PETSC_USE_COMPLEX) 3438 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3439 #endif 3440 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3441 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3442 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3443 lwork = -1; 3444 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3445 #if !defined(PETSC_USE_COMPLEX) 3446 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3447 #else 3448 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3449 #endif 3450 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3451 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3452 #else /* on missing GESVD */ 3453 /* SVD */ 3454 PetscInt max_n,min_n; 3455 max_n = max_size_of_constraint; 3456 min_n = max_constraints; 3457 if (max_size_of_constraint < max_constraints) { 3458 min_n = max_size_of_constraint; 3459 max_n = max_constraints; 3460 } 3461 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3462 #if defined(PETSC_USE_COMPLEX) 3463 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3464 #endif 3465 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3466 lwork = -1; 3467 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3468 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3469 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3470 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3471 #if !defined(PETSC_USE_COMPLEX) 3472 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3473 #else 3474 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3475 #endif 3476 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3477 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3478 #endif /* on missing GESVD */ 3479 /* Allocate optimal workspace */ 3480 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3482 } 3483 /* Now we can loop on constraining sets */ 3484 total_counts = 0; 3485 constraints_idxs_ptr[0] = 0; 3486 constraints_data_ptr[0] = 0; 3487 /* vertices */ 3488 if (n_vertices) { 3489 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3490 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3491 for (i=0;i<n_vertices;i++) { 3492 constraints_n[total_counts] = 1; 3493 constraints_data[total_counts] = 1.0; 3494 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3495 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3496 total_counts++; 3497 } 3498 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3499 n_vertices = total_counts; 3500 } 3501 3502 /* edges and faces */ 3503 total_counts_cc = total_counts; 3504 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3505 IS used_is; 3506 PetscBool idxs_copied = PETSC_FALSE; 3507 3508 if (ncc<n_ISForEdges) { 3509 used_is = ISForEdges[ncc]; 3510 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3511 } else { 3512 used_is = ISForFaces[ncc-n_ISForEdges]; 3513 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3514 } 3515 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3516 3517 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3518 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3519 /* change of basis should not be performed on local periodic nodes */ 3520 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3521 if (nnsp_has_cnst) { 3522 PetscScalar quad_value; 3523 3524 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3525 idxs_copied = PETSC_TRUE; 3526 3527 if (!pcbddc->use_nnsp_true) { 3528 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3529 } else { 3530 quad_value = 1.0; 3531 } 3532 for (j=0;j<size_of_constraint;j++) { 3533 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3534 } 3535 temp_constraints++; 3536 total_counts++; 3537 } 3538 for (k=0;k<nnsp_size;k++) { 3539 PetscReal real_value; 3540 PetscScalar *ptr_to_data; 3541 3542 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3543 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3544 for (j=0;j<size_of_constraint;j++) { 3545 ptr_to_data[j] = array[is_indices[j]]; 3546 } 3547 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3548 /* check if array is null on the connected component */ 3549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3550 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3551 if (real_value > 0.0) { /* keep indices and values */ 3552 temp_constraints++; 3553 total_counts++; 3554 if (!idxs_copied) { 3555 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3556 idxs_copied = PETSC_TRUE; 3557 } 3558 } 3559 } 3560 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3561 valid_constraints = temp_constraints; 3562 if (!pcbddc->use_nnsp_true && temp_constraints) { 3563 if (temp_constraints == 1) { /* just normalize the constraint */ 3564 PetscScalar norm,*ptr_to_data; 3565 3566 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3568 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3569 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3570 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3571 } else { /* perform SVD */ 3572 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3573 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3574 3575 #if defined(PETSC_MISSING_LAPACK_GESVD) 3576 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3577 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3578 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3579 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3580 from that computed using LAPACKgesvd 3581 -> This is due to a different computation of eigenvectors in LAPACKheev 3582 -> The quality of the POD-computed basis will be the same */ 3583 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3584 /* Store upper triangular part of correlation matrix */ 3585 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3586 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3587 for (j=0;j<temp_constraints;j++) { 3588 for (k=0;k<j+1;k++) { 3589 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3590 } 3591 } 3592 /* compute eigenvalues and eigenvectors of correlation matrix */ 3593 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3594 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3595 #if !defined(PETSC_USE_COMPLEX) 3596 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3597 #else 3598 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3599 #endif 3600 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3601 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3602 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3603 j = 0; 3604 while (j < temp_constraints && singular_vals[j] < tol) j++; 3605 total_counts = total_counts-j; 3606 valid_constraints = temp_constraints-j; 3607 /* scale and copy POD basis into used quadrature memory */ 3608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3609 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3610 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3611 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3612 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3614 if (j<temp_constraints) { 3615 PetscInt ii; 3616 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3617 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3618 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3619 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3620 for (k=0;k<temp_constraints-j;k++) { 3621 for (ii=0;ii<size_of_constraint;ii++) { 3622 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3623 } 3624 } 3625 } 3626 #else /* on missing GESVD */ 3627 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3628 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3629 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3630 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3631 #if !defined(PETSC_USE_COMPLEX) 3632 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3633 #else 3634 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3635 #endif 3636 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3637 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3638 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3639 k = temp_constraints; 3640 if (k > size_of_constraint) k = size_of_constraint; 3641 j = 0; 3642 while (j < k && singular_vals[k-j-1] < tol) j++; 3643 valid_constraints = k-j; 3644 total_counts = total_counts-temp_constraints+valid_constraints; 3645 #endif /* on missing GESVD */ 3646 } 3647 } 3648 /* update pointers information */ 3649 if (valid_constraints) { 3650 constraints_n[total_counts_cc] = valid_constraints; 3651 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3652 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3653 /* set change_of_basis flag */ 3654 if (boolforchange) { 3655 PetscBTSet(change_basis,total_counts_cc); 3656 } 3657 total_counts_cc++; 3658 } 3659 } 3660 /* free workspace */ 3661 if (!skip_lapack) { 3662 ierr = PetscFree(work);CHKERRQ(ierr); 3663 #if defined(PETSC_USE_COMPLEX) 3664 ierr = PetscFree(rwork);CHKERRQ(ierr); 3665 #endif 3666 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3667 #if defined(PETSC_MISSING_LAPACK_GESVD) 3668 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3669 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3670 #endif 3671 } 3672 for (k=0;k<nnsp_size;k++) { 3673 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3674 } 3675 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3676 /* free index sets of faces, edges and vertices */ 3677 for (i=0;i<n_ISForFaces;i++) { 3678 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3679 } 3680 if (n_ISForFaces) { 3681 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3682 } 3683 for (i=0;i<n_ISForEdges;i++) { 3684 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3685 } 3686 if (n_ISForEdges) { 3687 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3688 } 3689 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3690 } else { 3691 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3692 3693 total_counts = 0; 3694 n_vertices = 0; 3695 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3696 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3697 } 3698 max_constraints = 0; 3699 total_counts_cc = 0; 3700 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3701 total_counts += pcbddc->adaptive_constraints_n[i]; 3702 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3703 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3704 } 3705 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3706 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3707 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3708 constraints_data = pcbddc->adaptive_constraints_data; 3709 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3710 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3711 total_counts_cc = 0; 3712 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3713 if (pcbddc->adaptive_constraints_n[i]) { 3714 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3715 } 3716 } 3717 #if 0 3718 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3719 for (i=0;i<total_counts_cc;i++) { 3720 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3721 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3722 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3723 printf(" %d",constraints_idxs[j]); 3724 } 3725 printf("\n"); 3726 printf("number of cc: %d\n",constraints_n[i]); 3727 } 3728 for (i=0;i<n_vertices;i++) { 3729 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3730 } 3731 for (i=0;i<sub_schurs->n_subs;i++) { 3732 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3733 } 3734 #endif 3735 3736 max_size_of_constraint = 0; 3737 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3738 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3739 /* Change of basis */ 3740 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3741 if (pcbddc->use_change_of_basis) { 3742 for (i=0;i<sub_schurs->n_subs;i++) { 3743 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3744 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3745 } 3746 } 3747 } 3748 } 3749 pcbddc->local_primal_size = total_counts; 3750 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3751 3752 /* map constraints_idxs in boundary numbering */ 3753 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3754 if (i != constraints_idxs_ptr[total_counts_cc]) { 3755 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3756 } 3757 3758 /* Create constraint matrix */ 3759 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3760 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3761 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3762 3763 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3764 /* determine if a QR strategy is needed for change of basis */ 3765 qr_needed = PETSC_FALSE; 3766 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3767 total_primal_vertices=0; 3768 pcbddc->local_primal_size_cc = 0; 3769 for (i=0;i<total_counts_cc;i++) { 3770 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3771 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3772 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3773 pcbddc->local_primal_size_cc += 1; 3774 } else if (PetscBTLookup(change_basis,i)) { 3775 for (k=0;k<constraints_n[i];k++) { 3776 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3777 } 3778 pcbddc->local_primal_size_cc += constraints_n[i]; 3779 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3780 PetscBTSet(qr_needed_idx,i); 3781 qr_needed = PETSC_TRUE; 3782 } 3783 } else { 3784 pcbddc->local_primal_size_cc += 1; 3785 } 3786 } 3787 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3788 pcbddc->n_vertices = total_primal_vertices; 3789 /* permute indices in order to have a sorted set of vertices */ 3790 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3791 3792 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3793 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3794 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3795 3796 /* nonzero structure of constraint matrix */ 3797 /* and get reference dof for local constraints */ 3798 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3799 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3800 3801 j = total_primal_vertices; 3802 total_counts = total_primal_vertices; 3803 cum = total_primal_vertices; 3804 for (i=n_vertices;i<total_counts_cc;i++) { 3805 if (!PetscBTLookup(change_basis,i)) { 3806 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3807 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3808 cum++; 3809 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3810 for (k=0;k<constraints_n[i];k++) { 3811 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3812 nnz[j+k] = size_of_constraint; 3813 } 3814 j += constraints_n[i]; 3815 } 3816 } 3817 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3818 ierr = PetscFree(nnz);CHKERRQ(ierr); 3819 3820 /* set values in constraint matrix */ 3821 for (i=0;i<total_primal_vertices;i++) { 3822 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3823 } 3824 total_counts = total_primal_vertices; 3825 for (i=n_vertices;i<total_counts_cc;i++) { 3826 if (!PetscBTLookup(change_basis,i)) { 3827 PetscInt *cols; 3828 3829 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3830 cols = constraints_idxs+constraints_idxs_ptr[i]; 3831 for (k=0;k<constraints_n[i];k++) { 3832 PetscInt row = total_counts+k; 3833 PetscScalar *vals; 3834 3835 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3836 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3837 } 3838 total_counts += constraints_n[i]; 3839 } 3840 } 3841 /* assembling */ 3842 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3844 3845 /* 3846 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3847 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3848 */ 3849 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3850 if (pcbddc->use_change_of_basis) { 3851 /* dual and primal dofs on a single cc */ 3852 PetscInt dual_dofs,primal_dofs; 3853 /* working stuff for GEQRF */ 3854 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3855 PetscBLASInt lqr_work; 3856 /* working stuff for UNGQR */ 3857 PetscScalar *gqr_work,lgqr_work_t; 3858 PetscBLASInt lgqr_work; 3859 /* working stuff for TRTRS */ 3860 PetscScalar *trs_rhs; 3861 PetscBLASInt Blas_NRHS; 3862 /* pointers for values insertion into change of basis matrix */ 3863 PetscInt *start_rows,*start_cols; 3864 PetscScalar *start_vals; 3865 /* working stuff for values insertion */ 3866 PetscBT is_primal; 3867 PetscInt *aux_primal_numbering_B; 3868 /* matrix sizes */ 3869 PetscInt global_size,local_size; 3870 /* temporary change of basis */ 3871 Mat localChangeOfBasisMatrix; 3872 /* extra space for debugging */ 3873 PetscScalar *dbg_work; 3874 3875 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3876 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3877 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3878 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3879 /* nonzeros for local mat */ 3880 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3881 for (i=0;i<pcis->n;i++) nnz[i]=1; 3882 for (i=n_vertices;i<total_counts_cc;i++) { 3883 if (PetscBTLookup(change_basis,i)) { 3884 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3885 if (PetscBTLookup(qr_needed_idx,i)) { 3886 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3887 } else { 3888 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3889 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3890 } 3891 } 3892 } 3893 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3894 ierr = PetscFree(nnz);CHKERRQ(ierr); 3895 /* Set initial identity in the matrix */ 3896 for (i=0;i<pcis->n;i++) { 3897 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3898 } 3899 3900 if (pcbddc->dbg_flag) { 3901 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3902 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3903 } 3904 3905 3906 /* Now we loop on the constraints which need a change of basis */ 3907 /* 3908 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3909 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3910 3911 Basic blocks of change of basis matrix T computed by 3912 3913 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3914 3915 | 1 0 ... 0 s_1/S | 3916 | 0 1 ... 0 s_2/S | 3917 | ... | 3918 | 0 ... 1 s_{n-1}/S | 3919 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3920 3921 with S = \sum_{i=1}^n s_i^2 3922 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3923 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3924 3925 - QR decomposition of constraints otherwise 3926 */ 3927 if (qr_needed) { 3928 /* space to store Q */ 3929 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3930 /* first we issue queries for optimal work */ 3931 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3932 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3933 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3934 lqr_work = -1; 3935 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3936 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3937 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3938 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3939 lgqr_work = -1; 3940 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3941 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3942 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3943 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3944 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3945 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3946 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3947 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3948 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3949 /* array to store scaling factors for reflectors */ 3950 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3951 /* array to store rhs and solution of triangular solver */ 3952 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3953 /* allocating workspace for check */ 3954 if (pcbddc->dbg_flag) { 3955 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3956 } 3957 } 3958 /* array to store whether a node is primal or not */ 3959 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3960 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3961 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3962 if (i != total_primal_vertices) { 3963 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3964 } 3965 for (i=0;i<total_primal_vertices;i++) { 3966 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3967 } 3968 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3969 3970 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3971 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3972 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3973 if (PetscBTLookup(change_basis,total_counts)) { 3974 /* get constraint info */ 3975 primal_dofs = constraints_n[total_counts]; 3976 dual_dofs = size_of_constraint-primal_dofs; 3977 3978 if (pcbddc->dbg_flag) { 3979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3980 } 3981 3982 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3983 3984 /* copy quadrature constraints for change of basis check */ 3985 if (pcbddc->dbg_flag) { 3986 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3987 } 3988 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3989 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3990 3991 /* compute QR decomposition of constraints */ 3992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3993 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3994 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3995 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3996 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3997 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3998 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3999 4000 /* explictly compute R^-T */ 4001 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4002 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4003 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4004 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4006 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4007 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4008 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4009 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4010 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4011 4012 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4015 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4016 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4017 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4018 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4019 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4020 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4021 4022 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4023 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4024 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4025 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4026 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4027 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4029 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4031 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4032 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4033 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4034 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4035 4036 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4037 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4038 /* insert cols for primal dofs */ 4039 for (j=0;j<primal_dofs;j++) { 4040 start_vals = &qr_basis[j*size_of_constraint]; 4041 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4042 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4043 } 4044 /* insert cols for dual dofs */ 4045 for (j=0,k=0;j<dual_dofs;k++) { 4046 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4047 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4048 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4049 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4050 j++; 4051 } 4052 } 4053 4054 /* check change of basis */ 4055 if (pcbddc->dbg_flag) { 4056 PetscInt ii,jj; 4057 PetscBool valid_qr=PETSC_TRUE; 4058 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4062 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4063 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4064 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4065 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4066 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4067 for (jj=0;jj<size_of_constraint;jj++) { 4068 for (ii=0;ii<primal_dofs;ii++) { 4069 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4070 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4071 } 4072 } 4073 if (!valid_qr) { 4074 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4075 for (jj=0;jj<size_of_constraint;jj++) { 4076 for (ii=0;ii<primal_dofs;ii++) { 4077 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4078 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4079 } 4080 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4081 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4082 } 4083 } 4084 } 4085 } else { 4086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4087 } 4088 } 4089 } else { /* simple transformation block */ 4090 PetscInt row,col; 4091 PetscScalar val,norm; 4092 4093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4094 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4095 for (j=0;j<size_of_constraint;j++) { 4096 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4097 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4098 if (!PetscBTLookup(is_primal,row_B)) { 4099 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4100 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4101 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4102 } else { 4103 for (k=0;k<size_of_constraint;k++) { 4104 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4105 if (row != col) { 4106 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4107 } else { 4108 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4109 } 4110 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4111 } 4112 } 4113 } 4114 if (pcbddc->dbg_flag) { 4115 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4116 } 4117 } 4118 } else { 4119 if (pcbddc->dbg_flag) { 4120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4121 } 4122 } 4123 } 4124 4125 /* free workspace */ 4126 if (qr_needed) { 4127 if (pcbddc->dbg_flag) { 4128 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4129 } 4130 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4131 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4132 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4133 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4134 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4135 } 4136 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4137 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4138 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4139 4140 /* assembling of global change of variable */ 4141 if (!pcbddc->fake_change) { 4142 Mat tmat; 4143 PetscInt bs; 4144 4145 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4146 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4147 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4148 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4149 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4150 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4151 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4152 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4153 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4154 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4155 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4156 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4157 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4158 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4159 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4160 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4161 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4162 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4163 4164 /* check */ 4165 if (pcbddc->dbg_flag) { 4166 PetscReal error; 4167 Vec x,x_change; 4168 4169 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4170 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4171 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4172 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4173 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4174 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4175 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4176 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4177 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4178 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4179 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4180 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4183 ierr = VecDestroy(&x);CHKERRQ(ierr); 4184 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4185 } 4186 /* adapt sub_schurs computed (if any) */ 4187 if (pcbddc->use_deluxe_scaling) { 4188 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4189 4190 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4191 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4192 } 4193 if (sub_schurs->S_Ej_all) { 4194 Mat S_new,tmat; 4195 IS is_all_N,is_V_Sall = NULL; 4196 4197 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4198 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4199 if (pcbddc->deluxe_zerorows) { 4200 ISLocalToGlobalMapping NtoSall; 4201 IS is_V; 4202 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4203 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4204 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4205 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4206 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4207 } 4208 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4209 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4210 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4211 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4212 if (pcbddc->deluxe_zerorows) { 4213 const PetscScalar *array; 4214 const PetscInt *idxs_V,*idxs_all; 4215 PetscInt i,n_V; 4216 4217 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4218 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4219 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4220 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4221 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4222 for (i=0;i<n_V;i++) { 4223 PetscScalar val; 4224 PetscInt idx; 4225 4226 idx = idxs_V[i]; 4227 val = array[idxs_all[idxs_V[i]]]; 4228 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4229 } 4230 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4231 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4232 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4233 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4234 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4235 } 4236 sub_schurs->S_Ej_all = S_new; 4237 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4238 if (sub_schurs->sum_S_Ej_all) { 4239 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4240 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4241 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4242 if (pcbddc->deluxe_zerorows) { 4243 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4244 } 4245 sub_schurs->sum_S_Ej_all = S_new; 4246 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4247 } 4248 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4249 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4250 } 4251 /* destroy any change of basis context in sub_schurs */ 4252 if (sub_schurs->change) { 4253 PetscInt i; 4254 4255 for (i=0;i<sub_schurs->n_subs;i++) { 4256 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4257 } 4258 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4259 } 4260 } 4261 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4262 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4263 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4264 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4265 pcbddc->use_qr_single = qr_needed; 4266 } 4267 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4268 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4269 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4270 } 4271 4272 /* set up change of basis context */ 4273 if (pcbddc->ChangeOfBasisMatrix) { 4274 PCBDDCChange_ctx change_ctx; 4275 4276 if (!pcbddc->new_global_mat) { 4277 PetscInt global_size,local_size; 4278 4279 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4280 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4281 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4282 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4283 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4284 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4285 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4286 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4287 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4288 } else { 4289 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4290 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4291 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4292 } 4293 if (!pcbddc->user_ChangeOfBasisMatrix) { 4294 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4295 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4296 } else { 4297 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4298 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4299 } 4300 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4301 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4302 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4303 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4304 } else { 4305 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4306 } 4307 4308 if (!pcbddc->fake_change) { 4309 /* add pressure dofs to set of primal nodes for numbering purposes */ 4310 for (i=0;i<pcbddc->benign_n;i++) { 4311 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4312 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4313 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4314 pcbddc->local_primal_size_cc++; 4315 pcbddc->local_primal_size++; 4316 } 4317 4318 /* check if a new primal space has been introduced (also take into account benign trick) */ 4319 pcbddc->new_primal_space_local = PETSC_TRUE; 4320 if (olocal_primal_size == pcbddc->local_primal_size) { 4321 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4322 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4323 if (!pcbddc->new_primal_space_local) { 4324 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4325 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4326 } 4327 } 4328 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4329 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4330 } 4331 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4332 4333 /* flush dbg viewer */ 4334 if (pcbddc->dbg_flag) { 4335 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4336 } 4337 4338 /* free workspace */ 4339 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4340 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4341 if (!pcbddc->adaptive_selection) { 4342 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4343 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4344 } else { 4345 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4346 pcbddc->adaptive_constraints_idxs_ptr, 4347 pcbddc->adaptive_constraints_data_ptr, 4348 pcbddc->adaptive_constraints_idxs, 4349 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4350 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4351 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4352 } 4353 PetscFunctionReturn(0); 4354 } 4355 4356 #undef __FUNCT__ 4357 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4358 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4359 { 4360 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4361 PC_IS *pcis = (PC_IS*)pc->data; 4362 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4363 PetscInt ierr,i,N; 4364 PetscViewer viewer=pcbddc->dbg_viewer; 4365 4366 PetscFunctionBegin; 4367 /* Reset previously computed graph */ 4368 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4369 /* Init local Graph struct */ 4370 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4371 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4372 4373 /* Check validity of the csr graph passed in by the user */ 4374 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4375 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4376 } 4377 4378 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4379 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4380 PetscInt *xadj,*adjncy; 4381 PetscInt nvtxs; 4382 PetscBool flg_row=PETSC_FALSE; 4383 4384 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4385 if (flg_row) { 4386 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4387 pcbddc->computed_rowadj = PETSC_TRUE; 4388 } 4389 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4390 } 4391 if (pcbddc->dbg_flag) { 4392 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4393 } 4394 4395 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4396 if (pcbddc->user_provided_isfordofs) { 4397 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4398 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4399 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4400 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4401 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4402 } 4403 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4404 pcbddc->n_ISForDofs = 0; 4405 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4406 } 4407 } else { 4408 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4409 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4410 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4411 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4412 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4413 } 4414 } 4415 } 4416 4417 /* Setup of Graph */ 4418 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4419 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4420 } 4421 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4422 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4423 } 4424 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4425 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4426 } 4427 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4428 4429 /* attach info on disconnected subdomains if present */ 4430 if (pcbddc->n_local_subs) { 4431 PetscInt *local_subs; 4432 4433 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4434 for (i=0;i<pcbddc->n_local_subs;i++) { 4435 const PetscInt *idxs; 4436 PetscInt nl,j; 4437 4438 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4439 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4440 for (j=0;j<nl;j++) { 4441 local_subs[idxs[j]] = i; 4442 } 4443 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4444 } 4445 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4446 pcbddc->mat_graph->local_subs = local_subs; 4447 } 4448 4449 /* Graph's connected components analysis */ 4450 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4451 4452 /* print some info to stdout */ 4453 if (pcbddc->dbg_flag) { 4454 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4455 } 4456 PetscFunctionReturn(0); 4457 } 4458 4459 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4460 #undef __FUNCT__ 4461 #define __FUNCT__ "PCBDDCSubsetNumbering" 4462 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4463 { 4464 PetscSF sf; 4465 PetscLayout map; 4466 const PetscInt *idxs; 4467 PetscInt *leaf_data,*root_data,*gidxs; 4468 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4469 PetscInt n_n,nlocals,start,first_index; 4470 PetscMPIInt commsize; 4471 PetscBool first_found; 4472 PetscErrorCode ierr; 4473 4474 PetscFunctionBegin; 4475 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4476 if (subset_mult) { 4477 PetscCheckSameComm(subset,1,subset_mult,2); 4478 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4479 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4480 } 4481 /* create workspace layout for computing global indices of subset */ 4482 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4483 lbounds[0] = lbounds[1] = 0; 4484 for (i=0;i<n;i++) { 4485 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4486 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4487 } 4488 lbounds[0] = -lbounds[0]; 4489 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4490 gbounds[0] = -gbounds[0]; 4491 N = gbounds[1] - gbounds[0] + 1; 4492 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4493 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4494 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4495 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4496 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4497 4498 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4499 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4500 if (subset_mult) { 4501 const PetscInt* idxs_mult; 4502 4503 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4504 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4505 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4506 } else { 4507 for (i=0;i<n;i++) leaf_data[i] = 1; 4508 } 4509 /* local size of new subset */ 4510 n_n = 0; 4511 for (i=0;i<n;i++) n_n += leaf_data[i]; 4512 4513 /* global indexes in layout */ 4514 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4515 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4516 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4517 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4518 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4519 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4520 4521 /* reduce from leaves to roots */ 4522 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4523 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4524 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4525 4526 /* count indexes in local part of layout */ 4527 nlocals = 0; 4528 first_index = -1; 4529 first_found = PETSC_FALSE; 4530 for (i=0;i<Nl;i++) { 4531 if (!first_found && root_data[i]) { 4532 first_found = PETSC_TRUE; 4533 first_index = i; 4534 } 4535 nlocals += root_data[i]; 4536 } 4537 4538 /* cumulative of number of indexes and size of subset without holes */ 4539 #if defined(PETSC_HAVE_MPI_EXSCAN) 4540 start = 0; 4541 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4542 #else 4543 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4544 start = start-nlocals; 4545 #endif 4546 4547 if (N_n) { /* compute total size of new subset if requested */ 4548 *N_n = start + nlocals; 4549 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4550 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4551 } 4552 4553 /* adapt root data with cumulative */ 4554 if (first_found) { 4555 PetscInt old_index; 4556 4557 root_data[first_index] += start; 4558 old_index = first_index; 4559 for (i=first_index+1;i<Nl;i++) { 4560 if (root_data[i]) { 4561 root_data[i] += root_data[old_index]; 4562 old_index = i; 4563 } 4564 } 4565 } 4566 4567 /* from roots to leaves */ 4568 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4569 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4570 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4571 4572 /* create new IS with global indexes without holes */ 4573 if (subset_mult) { 4574 const PetscInt* idxs_mult; 4575 PetscInt cum; 4576 4577 cum = 0; 4578 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4579 for (i=0;i<n;i++) { 4580 PetscInt j; 4581 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4582 } 4583 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4584 } else { 4585 for (i=0;i<n;i++) { 4586 gidxs[i] = leaf_data[i]-1; 4587 } 4588 } 4589 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4590 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4591 PetscFunctionReturn(0); 4592 } 4593 4594 #undef __FUNCT__ 4595 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4596 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4597 { 4598 PetscInt i,j; 4599 PetscScalar *alphas; 4600 PetscErrorCode ierr; 4601 4602 PetscFunctionBegin; 4603 /* this implements stabilized Gram-Schmidt */ 4604 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4605 for (i=0;i<n;i++) { 4606 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4607 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4608 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4609 } 4610 ierr = PetscFree(alphas);CHKERRQ(ierr); 4611 PetscFunctionReturn(0); 4612 } 4613 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "MatISGetSubassemblingPattern" 4616 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4617 { 4618 IS ranks_send_to; 4619 PetscInt n_neighs,*neighs,*n_shared,**shared; 4620 PetscMPIInt size,rank,color; 4621 PetscInt *xadj,*adjncy; 4622 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4623 PetscInt i,local_size,threshold=0; 4624 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4625 PetscSubcomm subcomm; 4626 PetscErrorCode ierr; 4627 4628 PetscFunctionBegin; 4629 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4630 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4631 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4632 4633 /* Get info on mapping */ 4634 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4635 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4636 4637 /* build local CSR graph of subdomains' connectivity */ 4638 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4639 xadj[0] = 0; 4640 xadj[1] = PetscMax(n_neighs-1,0); 4641 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4642 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4643 4644 if (threshold) { 4645 PetscInt xadj_count = 0; 4646 for (i=1;i<n_neighs;i++) { 4647 if (n_shared[i] > threshold) { 4648 adjncy[xadj_count] = neighs[i]; 4649 adjncy_wgt[xadj_count] = n_shared[i]; 4650 xadj_count++; 4651 } 4652 } 4653 xadj[1] = xadj_count; 4654 } else { 4655 if (xadj[1]) { 4656 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4657 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4658 } 4659 } 4660 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4661 if (use_square) { 4662 for (i=0;i<xadj[1];i++) { 4663 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4664 } 4665 } 4666 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4667 4668 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4669 4670 /* 4671 Restrict work on active processes only. 4672 */ 4673 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4674 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4675 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4676 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4677 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4678 if (color) { 4679 ierr = PetscFree(xadj);CHKERRQ(ierr); 4680 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4681 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4682 } else { 4683 Mat subdomain_adj; 4684 IS new_ranks,new_ranks_contig; 4685 MatPartitioning partitioner; 4686 PetscInt prank,rstart=0,rend=0; 4687 PetscInt *is_indices,*oldranks; 4688 PetscBool aggregate; 4689 4690 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4691 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4692 prank = rank; 4693 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4694 /* 4695 for (i=0;i<size;i++) { 4696 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4697 } 4698 */ 4699 for (i=0;i<xadj[1];i++) { 4700 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4701 } 4702 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4703 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4704 if (aggregate) { 4705 PetscInt lrows,row,ncols,*cols; 4706 PetscMPIInt nrank; 4707 PetscScalar *vals; 4708 4709 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4710 lrows = 0; 4711 if (nrank<redprocs) { 4712 lrows = size/redprocs; 4713 if (nrank<size%redprocs) lrows++; 4714 } 4715 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4716 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4717 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4718 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4719 row = nrank; 4720 ncols = xadj[1]-xadj[0]; 4721 cols = adjncy; 4722 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4723 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4724 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4725 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4726 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4727 ierr = PetscFree(xadj);CHKERRQ(ierr); 4728 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4729 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4730 ierr = PetscFree(vals);CHKERRQ(ierr); 4731 } else { 4732 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4733 } 4734 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4735 4736 /* Partition */ 4737 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4738 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4739 if (use_vwgt) { 4740 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4741 v_wgt[0] = local_size; 4742 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4743 } 4744 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4745 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4746 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4747 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4748 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4749 4750 /* renumber new_ranks to avoid "holes" in new set of processors */ 4751 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4752 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4753 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4754 if (!redprocs) { 4755 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4756 } else { 4757 PetscInt idxs[1]; 4758 PetscMPIInt tag; 4759 MPI_Request *reqs; 4760 4761 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4762 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4763 for (i=rstart;i<rend;i++) { 4764 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4765 } 4766 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4767 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4768 ierr = PetscFree(reqs);CHKERRQ(ierr); 4769 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4770 } 4771 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4772 /* clean up */ 4773 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4774 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4775 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4776 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4777 } 4778 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4779 4780 /* assemble parallel IS for sends */ 4781 i = 1; 4782 if (color) i=0; 4783 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4784 /* get back IS */ 4785 *is_sends = ranks_send_to; 4786 PetscFunctionReturn(0); 4787 } 4788 4789 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4790 4791 #undef __FUNCT__ 4792 #define __FUNCT__ "MatISSubassemble" 4793 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4794 { 4795 Mat local_mat; 4796 IS is_sends_internal; 4797 PetscInt rows,cols,new_local_rows; 4798 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4799 PetscBool ismatis,isdense,newisdense,destroy_mat; 4800 ISLocalToGlobalMapping l2gmap; 4801 PetscInt* l2gmap_indices; 4802 const PetscInt* is_indices; 4803 MatType new_local_type; 4804 /* buffers */ 4805 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4806 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4807 PetscInt *recv_buffer_idxs_local; 4808 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4809 /* MPI */ 4810 MPI_Comm comm,comm_n; 4811 PetscSubcomm subcomm; 4812 PetscMPIInt n_sends,n_recvs,commsize; 4813 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4814 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4815 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4816 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4817 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4818 PetscErrorCode ierr; 4819 4820 PetscFunctionBegin; 4821 /* TODO: add missing checks */ 4822 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4823 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4824 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4825 PetscValidLogicalCollectiveInt(mat,nis,7); 4826 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4827 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4828 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4829 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4830 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4831 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4832 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4833 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4834 PetscInt mrows,mcols,mnrows,mncols; 4835 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4836 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4837 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4838 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4839 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4840 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4841 } 4842 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4843 PetscValidLogicalCollectiveInt(mat,bs,0); 4844 /* prepare IS for sending if not provided */ 4845 if (!is_sends) { 4846 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4847 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4848 } else { 4849 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4850 is_sends_internal = is_sends; 4851 } 4852 4853 /* get comm */ 4854 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4855 4856 /* compute number of sends */ 4857 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4858 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4859 4860 /* compute number of receives */ 4861 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4862 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4863 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4864 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4865 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4866 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4867 ierr = PetscFree(iflags);CHKERRQ(ierr); 4868 4869 /* restrict comm if requested */ 4870 subcomm = 0; 4871 destroy_mat = PETSC_FALSE; 4872 if (restrict_comm) { 4873 PetscMPIInt color,subcommsize; 4874 4875 color = 0; 4876 if (restrict_full) { 4877 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4878 } else { 4879 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4880 } 4881 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4882 subcommsize = commsize - subcommsize; 4883 /* check if reuse has been requested */ 4884 if (reuse == MAT_REUSE_MATRIX) { 4885 if (*mat_n) { 4886 PetscMPIInt subcommsize2; 4887 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4888 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4889 comm_n = PetscObjectComm((PetscObject)*mat_n); 4890 } else { 4891 comm_n = PETSC_COMM_SELF; 4892 } 4893 } else { /* MAT_INITIAL_MATRIX */ 4894 PetscMPIInt rank; 4895 4896 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4897 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4898 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4899 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4900 comm_n = PetscSubcommChild(subcomm); 4901 } 4902 /* flag to destroy *mat_n if not significative */ 4903 if (color) destroy_mat = PETSC_TRUE; 4904 } else { 4905 comm_n = comm; 4906 } 4907 4908 /* prepare send/receive buffers */ 4909 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4910 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4911 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4912 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4913 if (nis) { 4914 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4915 } 4916 4917 /* Get data from local matrices */ 4918 if (!isdense) { 4919 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4920 /* TODO: See below some guidelines on how to prepare the local buffers */ 4921 /* 4922 send_buffer_vals should contain the raw values of the local matrix 4923 send_buffer_idxs should contain: 4924 - MatType_PRIVATE type 4925 - PetscInt size_of_l2gmap 4926 - PetscInt global_row_indices[size_of_l2gmap] 4927 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4928 */ 4929 } else { 4930 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4931 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4932 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4933 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4934 send_buffer_idxs[1] = i; 4935 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4936 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4937 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4938 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4939 for (i=0;i<n_sends;i++) { 4940 ilengths_vals[is_indices[i]] = len*len; 4941 ilengths_idxs[is_indices[i]] = len+2; 4942 } 4943 } 4944 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4945 /* additional is (if any) */ 4946 if (nis) { 4947 PetscMPIInt psum; 4948 PetscInt j; 4949 for (j=0,psum=0;j<nis;j++) { 4950 PetscInt plen; 4951 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4952 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4953 psum += len+1; /* indices + lenght */ 4954 } 4955 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4956 for (j=0,psum=0;j<nis;j++) { 4957 PetscInt plen; 4958 const PetscInt *is_array_idxs; 4959 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4960 send_buffer_idxs_is[psum] = plen; 4961 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4962 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4963 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4964 psum += plen+1; /* indices + lenght */ 4965 } 4966 for (i=0;i<n_sends;i++) { 4967 ilengths_idxs_is[is_indices[i]] = psum; 4968 } 4969 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4970 } 4971 4972 buf_size_idxs = 0; 4973 buf_size_vals = 0; 4974 buf_size_idxs_is = 0; 4975 for (i=0;i<n_recvs;i++) { 4976 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4977 buf_size_vals += (PetscInt)olengths_vals[i]; 4978 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4979 } 4980 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4981 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4982 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4983 4984 /* get new tags for clean communications */ 4985 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4986 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4987 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4988 4989 /* allocate for requests */ 4990 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4991 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4992 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4993 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4994 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4995 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4996 4997 /* communications */ 4998 ptr_idxs = recv_buffer_idxs; 4999 ptr_vals = recv_buffer_vals; 5000 ptr_idxs_is = recv_buffer_idxs_is; 5001 for (i=0;i<n_recvs;i++) { 5002 source_dest = onodes[i]; 5003 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5004 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5005 ptr_idxs += olengths_idxs[i]; 5006 ptr_vals += olengths_vals[i]; 5007 if (nis) { 5008 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5009 ptr_idxs_is += olengths_idxs_is[i]; 5010 } 5011 } 5012 for (i=0;i<n_sends;i++) { 5013 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5014 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5015 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5016 if (nis) { 5017 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5018 } 5019 } 5020 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5021 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5022 5023 /* assemble new l2g map */ 5024 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5025 ptr_idxs = recv_buffer_idxs; 5026 new_local_rows = 0; 5027 for (i=0;i<n_recvs;i++) { 5028 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5029 ptr_idxs += olengths_idxs[i]; 5030 } 5031 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5032 ptr_idxs = recv_buffer_idxs; 5033 new_local_rows = 0; 5034 for (i=0;i<n_recvs;i++) { 5035 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5036 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5037 ptr_idxs += olengths_idxs[i]; 5038 } 5039 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5040 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5041 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5042 5043 /* infer new local matrix type from received local matrices type */ 5044 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5045 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5046 if (n_recvs) { 5047 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5048 ptr_idxs = recv_buffer_idxs; 5049 for (i=0;i<n_recvs;i++) { 5050 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5051 new_local_type_private = MATAIJ_PRIVATE; 5052 break; 5053 } 5054 ptr_idxs += olengths_idxs[i]; 5055 } 5056 switch (new_local_type_private) { 5057 case MATDENSE_PRIVATE: 5058 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5059 new_local_type = MATSEQAIJ; 5060 bs = 1; 5061 } else { /* if I receive only 1 dense matrix */ 5062 new_local_type = MATSEQDENSE; 5063 bs = 1; 5064 } 5065 break; 5066 case MATAIJ_PRIVATE: 5067 new_local_type = MATSEQAIJ; 5068 bs = 1; 5069 break; 5070 case MATBAIJ_PRIVATE: 5071 new_local_type = MATSEQBAIJ; 5072 break; 5073 case MATSBAIJ_PRIVATE: 5074 new_local_type = MATSEQSBAIJ; 5075 break; 5076 default: 5077 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5078 break; 5079 } 5080 } else { /* by default, new_local_type is seqdense */ 5081 new_local_type = MATSEQDENSE; 5082 bs = 1; 5083 } 5084 5085 /* create MATIS object if needed */ 5086 if (reuse == MAT_INITIAL_MATRIX) { 5087 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5088 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5089 } else { 5090 /* it also destroys the local matrices */ 5091 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5092 } 5093 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5094 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5095 5096 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5097 5098 /* Global to local map of received indices */ 5099 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5100 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5101 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5102 5103 /* restore attributes -> type of incoming data and its size */ 5104 buf_size_idxs = 0; 5105 for (i=0;i<n_recvs;i++) { 5106 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5107 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5108 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5109 } 5110 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5111 5112 /* set preallocation */ 5113 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5114 if (!newisdense) { 5115 PetscInt *new_local_nnz=0; 5116 5117 ptr_vals = recv_buffer_vals; 5118 ptr_idxs = recv_buffer_idxs_local; 5119 if (n_recvs) { 5120 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5121 } 5122 for (i=0;i<n_recvs;i++) { 5123 PetscInt j; 5124 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5125 for (j=0;j<*(ptr_idxs+1);j++) { 5126 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5127 } 5128 } else { 5129 /* TODO */ 5130 } 5131 ptr_idxs += olengths_idxs[i]; 5132 } 5133 if (new_local_nnz) { 5134 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5135 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5136 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5137 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5138 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5139 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5140 } else { 5141 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5142 } 5143 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5144 } else { 5145 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5146 } 5147 5148 /* set values */ 5149 ptr_vals = recv_buffer_vals; 5150 ptr_idxs = recv_buffer_idxs_local; 5151 for (i=0;i<n_recvs;i++) { 5152 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5153 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5154 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5155 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5156 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5157 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5158 } else { 5159 /* TODO */ 5160 } 5161 ptr_idxs += olengths_idxs[i]; 5162 ptr_vals += olengths_vals[i]; 5163 } 5164 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5165 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5166 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5167 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5168 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5169 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5170 5171 #if 0 5172 if (!restrict_comm) { /* check */ 5173 Vec lvec,rvec; 5174 PetscReal infty_error; 5175 5176 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5177 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5178 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5179 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5180 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5181 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5182 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5183 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5184 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5185 } 5186 #endif 5187 5188 /* assemble new additional is (if any) */ 5189 if (nis) { 5190 PetscInt **temp_idxs,*count_is,j,psum; 5191 5192 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5193 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5194 ptr_idxs = recv_buffer_idxs_is; 5195 psum = 0; 5196 for (i=0;i<n_recvs;i++) { 5197 for (j=0;j<nis;j++) { 5198 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5199 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5200 psum += plen; 5201 ptr_idxs += plen+1; /* shift pointer to received data */ 5202 } 5203 } 5204 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5205 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5206 for (i=1;i<nis;i++) { 5207 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5208 } 5209 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5210 ptr_idxs = recv_buffer_idxs_is; 5211 for (i=0;i<n_recvs;i++) { 5212 for (j=0;j<nis;j++) { 5213 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5214 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5215 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5216 ptr_idxs += plen+1; /* shift pointer to received data */ 5217 } 5218 } 5219 for (i=0;i<nis;i++) { 5220 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5221 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5222 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5223 } 5224 ierr = PetscFree(count_is);CHKERRQ(ierr); 5225 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5226 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5227 } 5228 /* free workspace */ 5229 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5230 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5231 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5232 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5233 if (isdense) { 5234 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5235 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5236 } else { 5237 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5238 } 5239 if (nis) { 5240 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5241 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5242 } 5243 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5244 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5245 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5246 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5247 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5248 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5249 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5250 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5251 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5252 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5253 ierr = PetscFree(onodes);CHKERRQ(ierr); 5254 if (nis) { 5255 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5256 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5257 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5258 } 5259 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5260 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5261 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5262 for (i=0;i<nis;i++) { 5263 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5264 } 5265 *mat_n = NULL; 5266 } 5267 PetscFunctionReturn(0); 5268 } 5269 5270 /* temporary hack into ksp private data structure */ 5271 #include <petsc/private/kspimpl.h> 5272 5273 #undef __FUNCT__ 5274 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5275 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5276 { 5277 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5278 PC_IS *pcis = (PC_IS*)pc->data; 5279 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5280 MatNullSpace CoarseNullSpace=NULL; 5281 ISLocalToGlobalMapping coarse_islg; 5282 IS coarse_is,*isarray; 5283 PetscInt i,im_active=-1,active_procs=-1; 5284 PetscInt nis,nisdofs,nisneu,nisvert; 5285 PC pc_temp; 5286 PCType coarse_pc_type; 5287 KSPType coarse_ksp_type; 5288 PetscBool multilevel_requested,multilevel_allowed; 5289 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5290 Mat t_coarse_mat_is; 5291 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5292 PetscMPIInt all_procs; 5293 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5294 PetscBool compute_vecs = PETSC_FALSE; 5295 PetscScalar *array; 5296 PetscErrorCode ierr; 5297 5298 PetscFunctionBegin; 5299 /* Assign global numbering to coarse dofs */ 5300 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5301 PetscInt ocoarse_size; 5302 compute_vecs = PETSC_TRUE; 5303 ocoarse_size = pcbddc->coarse_size; 5304 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5305 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5306 /* see if we can avoid some work */ 5307 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5308 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5309 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5310 PC pc; 5311 PetscBool isbddc; 5312 5313 /* temporary workaround since PCBDDC does not have a reset method so far */ 5314 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5315 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5316 if (isbddc) { 5317 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5318 } else { 5319 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5320 } 5321 coarse_reuse = PETSC_FALSE; 5322 } else { /* we can safely reuse already computed coarse matrix */ 5323 coarse_reuse = PETSC_TRUE; 5324 } 5325 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5326 coarse_reuse = PETSC_FALSE; 5327 } 5328 /* reset any subassembling information */ 5329 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5330 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5331 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5332 coarse_reuse = PETSC_TRUE; 5333 } 5334 5335 /* count "active" (i.e. with positive local size) and "void" processes */ 5336 im_active = !!(pcis->n); 5337 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5338 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5339 void_procs = all_procs-active_procs; 5340 csin_type_simple = PETSC_TRUE; 5341 redist = PETSC_FALSE; 5342 if (pcbddc->current_level && void_procs) { 5343 csin_ml = PETSC_TRUE; 5344 ncoarse_ml = void_procs; 5345 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5346 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5347 csin_ds = PETSC_TRUE; 5348 ncoarse_ds = pcbddc->redistribute_coarse; 5349 redist = PETSC_TRUE; 5350 } else { 5351 csin_ds = PETSC_TRUE; 5352 ncoarse_ds = active_procs; 5353 redist = PETSC_TRUE; 5354 } 5355 } else { 5356 csin_ml = PETSC_FALSE; 5357 ncoarse_ml = all_procs; 5358 if (void_procs) { 5359 csin_ds = PETSC_TRUE; 5360 ncoarse_ds = void_procs; 5361 csin_type_simple = PETSC_FALSE; 5362 } else { 5363 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5364 csin_ds = PETSC_TRUE; 5365 ncoarse_ds = pcbddc->redistribute_coarse; 5366 redist = PETSC_TRUE; 5367 } else { 5368 csin_ds = PETSC_FALSE; 5369 ncoarse_ds = all_procs; 5370 } 5371 } 5372 } 5373 5374 /* 5375 test if we can go multilevel: three conditions must be satisfied: 5376 - we have not exceeded the number of levels requested 5377 - we can actually subassemble the active processes 5378 - we can find a suitable number of MPI processes where we can place the subassembled problem 5379 */ 5380 multilevel_allowed = PETSC_FALSE; 5381 multilevel_requested = PETSC_FALSE; 5382 if (pcbddc->current_level < pcbddc->max_levels) { 5383 multilevel_requested = PETSC_TRUE; 5384 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5385 multilevel_allowed = PETSC_FALSE; 5386 } else { 5387 multilevel_allowed = PETSC_TRUE; 5388 } 5389 } 5390 /* determine number of process partecipating to coarse solver */ 5391 if (multilevel_allowed) { 5392 ncoarse = ncoarse_ml; 5393 csin = csin_ml; 5394 redist = PETSC_FALSE; 5395 } else { 5396 ncoarse = ncoarse_ds; 5397 csin = csin_ds; 5398 } 5399 5400 /* creates temporary l2gmap and IS for coarse indexes */ 5401 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5402 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5403 5404 /* creates temporary MATIS object for coarse matrix */ 5405 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5406 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5407 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5408 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5409 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5410 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5411 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5412 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5413 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5414 5415 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5416 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5417 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5418 const PetscInt *idxs; 5419 ISLocalToGlobalMapping tmap; 5420 5421 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5422 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5423 /* allocate space for temporary storage */ 5424 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5425 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5426 /* allocate for IS array */ 5427 nisdofs = pcbddc->n_ISForDofsLocal; 5428 nisneu = !!pcbddc->NeumannBoundariesLocal; 5429 nisvert = 0; 5430 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5431 nisvert = 1; 5432 } 5433 nis = nisdofs + nisneu + nisvert; 5434 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5435 /* dofs splitting */ 5436 for (i=0;i<nisdofs;i++) { 5437 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5438 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5439 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5440 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5441 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5442 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5443 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5444 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5445 } 5446 /* neumann boundaries */ 5447 if (pcbddc->NeumannBoundariesLocal) { 5448 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5449 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5450 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5451 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5452 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5453 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5454 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5455 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5456 } 5457 /* primal vertices (benign) */ 5458 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5459 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5460 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5461 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5462 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5463 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5464 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5465 } 5466 /* free memory */ 5467 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5468 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5469 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5470 } else { 5471 nis = 0; 5472 nisdofs = 0; 5473 nisneu = 0; 5474 nisvert = 0; 5475 isarray = NULL; 5476 } 5477 /* destroy no longer needed map */ 5478 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5479 5480 /* restrict on coarse candidates (if needed) */ 5481 coarse_mat_is = NULL; 5482 if (csin) { 5483 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5484 if (redist) { 5485 PetscMPIInt rank; 5486 PetscInt spc,n_spc_p1,dest[1],destsize; 5487 5488 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5489 spc = active_procs/ncoarse; 5490 n_spc_p1 = active_procs%ncoarse; 5491 if (im_active) { 5492 destsize = 1; 5493 if (rank > n_spc_p1*(spc+1)-1) { 5494 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5495 } else { 5496 dest[0] = rank/(spc+1); 5497 } 5498 } else { 5499 destsize = 0; 5500 } 5501 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5502 } else if (csin_type_simple) { 5503 PetscMPIInt rank; 5504 PetscInt issize,isidx; 5505 5506 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5507 if (im_active) { 5508 issize = 1; 5509 isidx = (PetscInt)rank; 5510 } else { 5511 issize = 0; 5512 isidx = -1; 5513 } 5514 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5515 } else { /* get a suitable subassembling pattern from MATIS code */ 5516 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5517 } 5518 5519 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5520 if (!redist || ncoarse <= void_procs) { 5521 PetscInt ncoarse_cand,tissize,*nisindices; 5522 PetscInt *coarse_candidates; 5523 const PetscInt* tisindices; 5524 5525 /* get coarse candidates' ranks in pc communicator */ 5526 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5527 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5528 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5529 if (!coarse_candidates[i]) { 5530 coarse_candidates[ncoarse_cand++]=i; 5531 } 5532 } 5533 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5534 5535 5536 if (pcbddc->dbg_flag) { 5537 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5538 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5539 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5540 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5541 for (i=0;i<ncoarse_cand;i++) { 5542 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5543 } 5544 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5545 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5546 } 5547 /* shift the pattern on coarse candidates */ 5548 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5549 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5550 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5551 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5552 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5553 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5554 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5555 } 5556 if (pcbddc->dbg_flag) { 5557 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5558 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5559 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5560 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5561 } 5562 } 5563 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5564 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5565 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5566 } else { /* this is the last level, so use just receiving processes in subcomm */ 5567 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5568 } 5569 } else { 5570 if (pcbddc->dbg_flag) { 5571 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5573 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5574 } 5575 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5576 coarse_mat_is = t_coarse_mat_is; 5577 } 5578 5579 /* create local to global scatters for coarse problem */ 5580 if (compute_vecs) { 5581 PetscInt lrows; 5582 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5583 if (coarse_mat_is) { 5584 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5585 } else { 5586 lrows = 0; 5587 } 5588 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5589 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5590 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5591 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5592 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5593 } 5594 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5595 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5596 5597 /* set defaults for coarse KSP and PC */ 5598 if (multilevel_allowed) { 5599 coarse_ksp_type = KSPRICHARDSON; 5600 coarse_pc_type = PCBDDC; 5601 } else { 5602 coarse_ksp_type = KSPPREONLY; 5603 coarse_pc_type = PCREDUNDANT; 5604 } 5605 5606 /* print some info if requested */ 5607 if (pcbddc->dbg_flag) { 5608 if (!multilevel_allowed) { 5609 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5610 if (multilevel_requested) { 5611 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5612 } else if (pcbddc->max_levels) { 5613 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5614 } 5615 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5616 } 5617 } 5618 5619 /* create the coarse KSP object only once with defaults */ 5620 if (coarse_mat_is) { 5621 MatReuse coarse_mat_reuse; 5622 PetscViewer dbg_viewer = NULL; 5623 if (pcbddc->dbg_flag) { 5624 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5625 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5626 } 5627 if (!pcbddc->coarse_ksp) { 5628 char prefix[256],str_level[16]; 5629 size_t len; 5630 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5631 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5632 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5633 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5634 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5635 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5636 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5637 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5638 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5639 /* prefix */ 5640 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5641 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5642 if (!pcbddc->current_level) { 5643 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5644 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5645 } else { 5646 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5647 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5648 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5649 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5650 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5651 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5652 } 5653 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5654 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5655 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5656 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5657 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5658 /* allow user customization */ 5659 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5660 } 5661 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5662 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5663 if (nisdofs) { 5664 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5665 for (i=0;i<nisdofs;i++) { 5666 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5667 } 5668 } 5669 if (nisneu) { 5670 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5671 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5672 } 5673 if (nisvert) { 5674 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5675 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5676 } 5677 5678 /* get some info after set from options */ 5679 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5680 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5681 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5682 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5683 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5684 isbddc = PETSC_FALSE; 5685 } 5686 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5687 if (isredundant) { 5688 KSP inner_ksp; 5689 PC inner_pc; 5690 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5691 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5692 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5693 } 5694 5695 /* assemble coarse matrix */ 5696 if (coarse_reuse) { 5697 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5698 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5699 coarse_mat_reuse = MAT_REUSE_MATRIX; 5700 } else { 5701 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5702 } 5703 if (isbddc || isnn) { 5704 if (isbddc) { /* currently there are no APIs for these options */ 5705 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5706 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5707 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5708 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5709 if (pcbddc_coarse->benign_compute_nonetflux) { 5710 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5711 } 5712 } 5713 if (pcbddc->coarsening_ratio > 1) { 5714 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5715 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5716 if (pcbddc->dbg_flag) { 5717 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5718 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5719 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5720 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5721 } 5722 } 5723 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5724 } else { 5725 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5726 coarse_mat = coarse_mat_is; 5727 } 5728 } else { 5729 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5730 } 5731 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5732 5733 /* propagate symmetry info of coarse matrix */ 5734 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5735 if (pc->pmat->symmetric_set) { 5736 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5737 } 5738 if (pc->pmat->hermitian_set) { 5739 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5740 } 5741 if (pc->pmat->spd_set) { 5742 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5743 } 5744 /* set operators */ 5745 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5746 if (pcbddc->dbg_flag) { 5747 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5748 } 5749 } else { /* processes non partecipating to coarse solver (if any) */ 5750 coarse_mat = 0; 5751 } 5752 ierr = PetscFree(isarray);CHKERRQ(ierr); 5753 #if 0 5754 { 5755 PetscViewer viewer; 5756 char filename[256]; 5757 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5758 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5759 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5760 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5761 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5762 } 5763 #endif 5764 5765 /* Compute coarse null space (special handling by BDDC only) */ 5766 #if 0 5767 if (pcbddc->NullSpace) { 5768 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5769 } 5770 #endif 5771 /* hack */ 5772 if (pcbddc->coarse_ksp) { 5773 Vec crhs,csol; 5774 5775 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5776 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5777 if (!csol) { 5778 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5779 } 5780 if (!crhs) { 5781 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5782 } 5783 } 5784 5785 /* compute null space for coarse solver if the benign trick has been requested */ 5786 if (pcbddc->benign_null) { 5787 5788 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5789 for (i=0;i<pcbddc->benign_n;i++) { 5790 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5791 } 5792 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5793 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5794 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5795 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5796 if (coarse_mat) { 5797 Vec nullv; 5798 PetscScalar *array,*array2; 5799 PetscInt nl; 5800 5801 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5802 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5803 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5804 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5805 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5806 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5807 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5808 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5809 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5810 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5811 } 5812 } 5813 5814 if (pcbddc->coarse_ksp) { 5815 PetscBool ispreonly; 5816 5817 if (CoarseNullSpace) { 5818 PetscBool isnull; 5819 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5820 if (0) { 5821 if (isbddc && !pcbddc->benign_saddle_point) { 5822 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5823 } else { 5824 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5825 } 5826 } else { 5827 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5828 } 5829 } 5830 /* setup coarse ksp */ 5831 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5832 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5833 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5834 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5835 KSP check_ksp; 5836 KSPType check_ksp_type; 5837 PC check_pc; 5838 Vec check_vec,coarse_vec; 5839 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5840 PetscInt its; 5841 PetscBool compute_eigs; 5842 PetscReal *eigs_r,*eigs_c; 5843 PetscInt neigs; 5844 const char *prefix; 5845 5846 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5847 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5848 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5849 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5850 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5851 if (ispreonly) { 5852 check_ksp_type = KSPPREONLY; 5853 compute_eigs = PETSC_FALSE; 5854 } else { 5855 check_ksp_type = KSPGMRES; 5856 compute_eigs = PETSC_TRUE; 5857 } 5858 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5859 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5860 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5861 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5862 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5863 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5864 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5865 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5866 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5867 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5868 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5869 /* create random vec */ 5870 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5871 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5872 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5873 if (CoarseNullSpace) { 5874 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5875 } 5876 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5877 /* solve coarse problem */ 5878 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5879 if (CoarseNullSpace) { 5880 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5881 } 5882 /* set eigenvalue estimation if preonly has not been requested */ 5883 if (compute_eigs) { 5884 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5885 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5886 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5887 lambda_max = eigs_r[neigs-1]; 5888 lambda_min = eigs_r[0]; 5889 if (pcbddc->use_coarse_estimates) { 5890 if (lambda_max>lambda_min) { 5891 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5892 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5893 } 5894 } 5895 } 5896 5897 /* check coarse problem residual error */ 5898 if (pcbddc->dbg_flag) { 5899 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5900 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5901 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5902 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5903 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5904 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5905 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5906 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5907 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5908 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5909 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5910 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5911 if (CoarseNullSpace) { 5912 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5913 } 5914 if (compute_eigs) { 5915 PetscReal lambda_max_s,lambda_min_s; 5916 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5917 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5918 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5919 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5920 for (i=0;i<neigs;i++) { 5921 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5922 } 5923 } 5924 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5925 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5926 } 5927 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5928 if (compute_eigs) { 5929 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5930 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5931 } 5932 } 5933 } 5934 /* print additional info */ 5935 if (pcbddc->dbg_flag) { 5936 /* waits until all processes reaches this point */ 5937 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5938 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5939 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5940 } 5941 5942 /* free memory */ 5943 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5944 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5945 PetscFunctionReturn(0); 5946 } 5947 5948 #undef __FUNCT__ 5949 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5950 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5951 { 5952 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5953 PC_IS* pcis = (PC_IS*)pc->data; 5954 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5955 IS subset,subset_mult,subset_n; 5956 PetscInt local_size,coarse_size=0; 5957 PetscInt *local_primal_indices=NULL; 5958 const PetscInt *t_local_primal_indices; 5959 PetscErrorCode ierr; 5960 5961 PetscFunctionBegin; 5962 /* Compute global number of coarse dofs */ 5963 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5964 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5965 } 5966 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5967 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5968 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5969 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5970 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5971 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5972 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5973 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5974 if (local_size != pcbddc->local_primal_size) { 5975 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5976 } 5977 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5978 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5979 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5980 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5981 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5982 5983 /* check numbering */ 5984 if (pcbddc->dbg_flag) { 5985 PetscScalar coarsesum,*array,*array2; 5986 PetscInt i; 5987 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5988 5989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5990 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5991 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5992 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5993 /* counter */ 5994 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5995 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5996 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5997 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5998 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5999 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6000 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6001 for (i=0;i<pcbddc->local_primal_size;i++) { 6002 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6003 } 6004 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6005 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6006 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6007 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6008 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6009 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6010 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6011 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6012 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6013 for (i=0;i<pcis->n;i++) { 6014 if (array[i] != 0.0 && array[i] != array2[i]) { 6015 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6016 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6017 set_error = PETSC_TRUE; 6018 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6019 } 6020 } 6021 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6022 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6023 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6024 for (i=0;i<pcis->n;i++) { 6025 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6026 } 6027 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6028 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6029 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6030 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6031 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6033 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6034 PetscInt *gidxs; 6035 6036 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6037 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6038 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6039 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6040 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6041 for (i=0;i<pcbddc->local_primal_size;i++) { 6042 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6043 } 6044 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6045 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6046 } 6047 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6048 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6049 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6050 } 6051 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6052 /* get back data */ 6053 *coarse_size_n = coarse_size; 6054 *local_primal_indices_n = local_primal_indices; 6055 PetscFunctionReturn(0); 6056 } 6057 6058 #undef __FUNCT__ 6059 #define __FUNCT__ "PCBDDCGlobalToLocal" 6060 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6061 { 6062 IS localis_t; 6063 PetscInt i,lsize,*idxs,n; 6064 PetscScalar *vals; 6065 PetscErrorCode ierr; 6066 6067 PetscFunctionBegin; 6068 /* get indices in local ordering exploiting local to global map */ 6069 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6070 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6071 for (i=0;i<lsize;i++) vals[i] = 1.0; 6072 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6073 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6074 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6075 if (idxs) { /* multilevel guard */ 6076 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6077 } 6078 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6079 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6080 ierr = PetscFree(vals);CHKERRQ(ierr); 6081 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6082 /* now compute set in local ordering */ 6083 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6084 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6085 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6086 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6087 for (i=0,lsize=0;i<n;i++) { 6088 if (PetscRealPart(vals[i]) > 0.5) { 6089 lsize++; 6090 } 6091 } 6092 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6093 for (i=0,lsize=0;i<n;i++) { 6094 if (PetscRealPart(vals[i]) > 0.5) { 6095 idxs[lsize++] = i; 6096 } 6097 } 6098 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6099 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6100 *localis = localis_t; 6101 PetscFunctionReturn(0); 6102 } 6103 6104 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6105 #undef __FUNCT__ 6106 #define __FUNCT__ "PCBDDCMatMult_Private" 6107 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6108 { 6109 PCBDDCChange_ctx change_ctx; 6110 PetscErrorCode ierr; 6111 6112 PetscFunctionBegin; 6113 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6114 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6115 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6116 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6117 PetscFunctionReturn(0); 6118 } 6119 6120 #undef __FUNCT__ 6121 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6122 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6123 { 6124 PCBDDCChange_ctx change_ctx; 6125 PetscErrorCode ierr; 6126 6127 PetscFunctionBegin; 6128 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6129 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6130 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6131 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6132 PetscFunctionReturn(0); 6133 } 6134 6135 #undef __FUNCT__ 6136 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6137 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6138 { 6139 PC_IS *pcis=(PC_IS*)pc->data; 6140 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6141 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6142 Mat S_j; 6143 PetscInt *used_xadj,*used_adjncy; 6144 PetscBool free_used_adj; 6145 PetscErrorCode ierr; 6146 6147 PetscFunctionBegin; 6148 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6149 free_used_adj = PETSC_FALSE; 6150 if (pcbddc->sub_schurs_layers == -1) { 6151 used_xadj = NULL; 6152 used_adjncy = NULL; 6153 } else { 6154 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6155 used_xadj = pcbddc->mat_graph->xadj; 6156 used_adjncy = pcbddc->mat_graph->adjncy; 6157 } else if (pcbddc->computed_rowadj) { 6158 used_xadj = pcbddc->mat_graph->xadj; 6159 used_adjncy = pcbddc->mat_graph->adjncy; 6160 } else { 6161 PetscBool flg_row=PETSC_FALSE; 6162 const PetscInt *xadj,*adjncy; 6163 PetscInt nvtxs; 6164 6165 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6166 if (flg_row) { 6167 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6168 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6169 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6170 free_used_adj = PETSC_TRUE; 6171 } else { 6172 pcbddc->sub_schurs_layers = -1; 6173 used_xadj = NULL; 6174 used_adjncy = NULL; 6175 } 6176 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6177 } 6178 } 6179 6180 /* setup sub_schurs data */ 6181 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6182 if (!sub_schurs->schur_explicit) { 6183 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6184 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6185 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6186 } else { 6187 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6188 PetscBool isseqaij; 6189 PetscInt benign_n; 6190 Mat change = NULL; 6191 IS change_primal = NULL; 6192 6193 if (!pcbddc->use_vertices && reuse_solvers) { 6194 PetscInt n_vertices; 6195 6196 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6197 reuse_solvers = (PetscBool)!n_vertices; 6198 } 6199 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6200 if (!isseqaij) { 6201 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6202 if (matis->A == pcbddc->local_mat) { 6203 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6204 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6205 } else { 6206 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6207 } 6208 } 6209 if (!pcbddc->benign_change_explicit) { 6210 benign_n = pcbddc->benign_n; 6211 } else { 6212 benign_n = 0; 6213 } 6214 /* If the user defines additional constraints, we import them here. 6215 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6216 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6217 PC_IS *pcisf; 6218 PC_BDDC *pcbddcf; 6219 PC pcf; 6220 6221 if (pcbddc->sub_schurs_rebuild) { 6222 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6223 } 6224 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6225 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6226 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6227 /* hacks */ 6228 pcisf = (PC_IS*)pcf->data; 6229 pcisf->is_B_local = pcis->is_B_local; 6230 pcisf->vec1_N = pcis->vec1_N; 6231 pcisf->BtoNmap = pcis->BtoNmap; 6232 pcisf->n = pcis->n; 6233 pcisf->n_B = pcis->n_B; 6234 pcbddcf = (PC_BDDC*)pcf->data; 6235 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6236 pcbddcf->mat_graph = pcbddc->mat_graph; 6237 pcbddcf->use_faces = PETSC_TRUE; 6238 pcbddcf->use_change_of_basis = PETSC_TRUE; 6239 pcbddcf->use_change_on_faces = PETSC_TRUE; 6240 pcbddcf->use_qr_single = PETSC_TRUE; 6241 pcbddcf->fake_change = PETSC_TRUE; 6242 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6243 /* store information on primal vertices and change of basis (in local numbering) */ 6244 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6245 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6246 change = pcbddcf->ConstraintMatrix; 6247 pcbddcf->ConstraintMatrix = NULL; 6248 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6249 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6250 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6251 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6252 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6253 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6254 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6255 pcf->ops->destroy = NULL; 6256 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6257 } 6258 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6259 ierr = MatDestroy(&change);CHKERRQ(ierr); 6260 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6261 } 6262 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6263 6264 /* free adjacency */ 6265 if (free_used_adj) { 6266 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6267 } 6268 PetscFunctionReturn(0); 6269 } 6270 6271 #undef __FUNCT__ 6272 #define __FUNCT__ "PCBDDCInitSubSchurs" 6273 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6274 { 6275 PC_IS *pcis=(PC_IS*)pc->data; 6276 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6277 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6278 PCBDDCGraph graph; 6279 PetscErrorCode ierr; 6280 6281 PetscFunctionBegin; 6282 /* attach interface graph for determining subsets */ 6283 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6284 IS verticesIS,verticescomm; 6285 PetscInt vsize,*idxs; 6286 6287 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6288 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6289 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6290 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6291 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6292 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6293 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6294 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6295 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6296 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6297 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6298 /* 6299 if (pcbddc->dbg_flag) { 6300 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6301 } 6302 */ 6303 } else { 6304 graph = pcbddc->mat_graph; 6305 } 6306 6307 /* sub_schurs init */ 6308 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6309 6310 /* free graph struct */ 6311 if (pcbddc->sub_schurs_rebuild) { 6312 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6313 } 6314 PetscFunctionReturn(0); 6315 } 6316 6317 #undef __FUNCT__ 6318 #define __FUNCT__ "PCBDDCCheckOperator" 6319 PetscErrorCode PCBDDCCheckOperator(PC pc) 6320 { 6321 PC_IS *pcis=(PC_IS*)pc->data; 6322 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6323 PetscErrorCode ierr; 6324 6325 PetscFunctionBegin; 6326 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6327 IS zerodiag = NULL; 6328 Mat S_j,B0_B=NULL; 6329 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6330 PetscScalar *p0_check,*array,*array2; 6331 PetscReal norm; 6332 PetscInt i; 6333 6334 /* B0 and B0_B */ 6335 if (zerodiag) { 6336 IS dummy; 6337 6338 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6339 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6340 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6341 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6342 } 6343 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6344 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6345 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6346 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6347 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6348 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6349 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6350 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6351 /* S_j */ 6352 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6353 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6354 6355 /* mimic vector in \widetilde{W}_\Gamma */ 6356 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6357 /* continuous in primal space */ 6358 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6359 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6360 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6361 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6362 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6363 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6364 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6365 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6366 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6367 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6368 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6369 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6370 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6371 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6372 6373 /* assemble rhs for coarse problem */ 6374 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6375 /* local with Schur */ 6376 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6377 if (zerodiag) { 6378 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6379 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6380 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6381 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6382 } 6383 /* sum on primal nodes the local contributions */ 6384 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6385 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6386 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6387 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6388 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6389 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6390 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6391 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6392 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6393 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6394 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6395 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6396 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6397 /* scale primal nodes (BDDC sums contibutions) */ 6398 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6399 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6400 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6401 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6402 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6403 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6404 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6405 /* global: \widetilde{B0}_B w_\Gamma */ 6406 if (zerodiag) { 6407 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6408 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6409 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6410 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6411 } 6412 /* BDDC */ 6413 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6414 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6415 6416 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6417 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6418 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6419 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6420 for (i=0;i<pcbddc->benign_n;i++) { 6421 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6422 } 6423 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6424 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6425 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6426 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6427 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6428 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6429 } 6430 PetscFunctionReturn(0); 6431 } 6432