1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 PetscInt *nnz; 799 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 800 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 801 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 802 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 803 for (i=0;i<pcbddc->benign_n;i++) { 804 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 805 nnz[i] = n - nnz[i]; 806 } 807 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 808 ierr = PetscFree(nnz);CHKERRQ(ierr); 809 } 810 811 for (i=0;i<pcbddc->benign_n;i++) { 812 PetscScalar *array; 813 PetscInt *idxs,j,nz,cum; 814 815 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 818 for (j=0;j<nz;j++) vals[j] = 1.; 819 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 823 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 824 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 825 cum = 0; 826 for (j=0;j<n;j++) { 827 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 828 vals[cum] = array[j]; 829 idxs_ins[cum] = j; 830 cum++; 831 } 832 } 833 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 834 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 835 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 840 } 841 } else { /* push */ 842 if (pcbddc->benign_change_explicit) { 843 PetscInt i; 844 845 for (i=0;i<pcbddc->benign_n;i++) { 846 PetscScalar *B0_vals; 847 PetscInt *B0_cols,B0_ncol; 848 849 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 850 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 851 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 852 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 853 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 859 } 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCAdaptiveSelection" 866 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 867 { 868 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 869 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 870 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 871 PetscBLASInt *B_iwork,*B_ifail; 872 PetscScalar *work,lwork; 873 PetscScalar *St,*S,*eigv; 874 PetscScalar *Sarray,*Starray; 875 PetscReal *eigs,thresh; 876 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 877 PetscBool allocated_S_St; 878 #if defined(PETSC_USE_COMPLEX) 879 PetscReal *rwork; 880 #endif 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 if (!sub_schurs->schur_explicit) { 885 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 886 } 887 888 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 if (pcbddc->dbg_flag) { 893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 if (pcbddc->dbg_flag) { 900 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 901 } 902 903 /* max size of subsets */ 904 mss = 0; 905 for (i=0;i<sub_schurs->n_subs;i++) { 906 PetscInt subset_size; 907 908 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 909 mss = PetscMax(mss,subset_size); 910 } 911 912 /* min/max and threshold */ 913 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 914 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 915 nmax = PetscMax(nmin,nmax); 916 allocated_S_St = PETSC_FALSE; 917 if (nmin) { 918 allocated_S_St = PETSC_TRUE; 919 } 920 921 /* allocate lapack workspace */ 922 cum = cum2 = 0; 923 maxneigs = 0; 924 for (i=0;i<sub_schurs->n_subs;i++) { 925 PetscInt n,subset_size; 926 927 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 928 n = PetscMin(subset_size,nmax); 929 cum += subset_size; 930 cum2 += subset_size*n; 931 maxneigs = PetscMax(maxneigs,n); 932 } 933 if (mss) { 934 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 935 PetscBLASInt B_itype = 1; 936 PetscBLASInt B_N = mss; 937 PetscReal zero = 0.0; 938 PetscReal eps = 0.0; /* dlamch? */ 939 940 B_lwork = -1; 941 S = NULL; 942 St = NULL; 943 eigs = NULL; 944 eigv = NULL; 945 B_iwork = NULL; 946 B_ifail = NULL; 947 #if defined(PETSC_USE_COMPLEX) 948 rwork = NULL; 949 #endif 950 thresh = 1.0; 951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 952 #if defined(PETSC_USE_COMPLEX) 953 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 954 #else 955 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 956 #endif 957 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 958 ierr = PetscFPTrapPop();CHKERRQ(ierr); 959 } else { 960 /* TODO */ 961 } 962 } else { 963 lwork = 0; 964 } 965 966 nv = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 968 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 969 } 970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 971 if (allocated_S_St) { 972 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 973 } 974 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 977 #endif 978 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 979 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 980 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 981 nv+cum,&pcbddc->adaptive_constraints_idxs, 982 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 983 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 984 985 maxneigs = 0; 986 cum = cum2 = cumarray = 0; 987 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 988 pcbddc->adaptive_constraints_data_ptr[0] = 0; 989 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 990 const PetscInt *idxs; 991 992 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 993 for (cum=0;cum<nv;cum++) { 994 pcbddc->adaptive_constraints_n[cum] = 1; 995 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 996 pcbddc->adaptive_constraints_data[cum] = 1.0; 997 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 998 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 999 } 1000 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1001 } 1002 1003 if (mss) { /* multilevel */ 1004 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1005 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1006 } 1007 1008 thresh = pcbddc->adaptive_threshold; 1009 for (i=0;i<sub_schurs->n_subs;i++) { 1010 const PetscInt *idxs; 1011 PetscReal infty = PETSC_MAX_REAL; 1012 PetscInt j,subset_size,eigs_start = 0; 1013 PetscBLASInt B_N; 1014 PetscBool same_data = PETSC_FALSE; 1015 1016 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1017 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1018 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1019 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1020 if (sub_schurs->is_hermitian) { 1021 PetscInt j,k; 1022 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1023 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1024 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 } 1026 for (j=0;j<subset_size;j++) { 1027 for (k=j;k<subset_size;k++) { 1028 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1029 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1030 } 1031 } 1032 } else { 1033 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1034 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1035 } 1036 } else { 1037 S = Sarray + cumarray; 1038 St = Starray + cumarray; 1039 } 1040 /* see if we can save some work */ 1041 if (sub_schurs->n_subs == 1) { 1042 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1043 } 1044 1045 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1046 B_neigs = 0; 1047 } else { 1048 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1049 PetscBLASInt B_itype = 1; 1050 PetscBLASInt B_IL, B_IU; 1051 PetscReal eps = -1.0; /* dlamch? */ 1052 PetscInt nmin_s; 1053 1054 if (pcbddc->dbg_flag) { 1055 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1056 } 1057 1058 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1059 if (thresh > 1.+PETSC_SMALL) { 1060 1061 /* ask for eigenvalues larger than thresh */ 1062 #if defined(PETSC_USE_COMPLEX) 1063 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1064 #else 1065 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1066 #endif 1067 } else { 1068 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1069 B_IL = 1; 1070 #if defined(PETSC_USE_COMPLEX) 1071 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1072 #else 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1074 #endif 1075 } 1076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1077 if (B_ierr) { 1078 if (B_ierr < 0 ) { 1079 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1080 } else if (B_ierr <= B_N) { 1081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1082 } else { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1084 } 1085 } 1086 1087 if (B_neigs > nmax) { 1088 if (pcbddc->dbg_flag) { 1089 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1090 } 1091 eigs_start = B_neigs -nmax; 1092 B_neigs = nmax; 1093 } 1094 1095 nmin_s = PetscMin(nmin,B_N); 1096 if (B_neigs < nmin_s) { 1097 PetscBLASInt B_neigs2; 1098 1099 B_IU = B_N - B_neigs; 1100 B_IL = B_N - nmin_s + 1; 1101 if (pcbddc->dbg_flag) { 1102 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1103 } 1104 if (sub_schurs->is_hermitian) { 1105 PetscInt j; 1106 for (j=0;j<subset_size;j++) { 1107 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1108 } 1109 for (j=0;j<subset_size;j++) { 1110 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1111 } 1112 } else { 1113 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1114 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1115 } 1116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1117 #if defined(PETSC_USE_COMPLEX) 1118 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1119 #else 1120 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1121 #endif 1122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1123 B_neigs += B_neigs2; 1124 } 1125 if (B_ierr) { 1126 if (B_ierr < 0 ) { 1127 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1128 } else if (B_ierr <= B_N) { 1129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1130 } else { 1131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1132 } 1133 } 1134 if (pcbddc->dbg_flag) { 1135 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1136 for (j=0;j<B_neigs;j++) { 1137 if (eigs[j] == 0.0) { 1138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1141 } 1142 } 1143 } 1144 } else { 1145 /* TODO */ 1146 } 1147 } 1148 /* change the basis back to the original one */ 1149 if (sub_schurs->change) { 1150 Mat change,change_subs,phi,phit; 1151 IS issubs; 1152 1153 if (pcbddc->dbg_flag > 1) { 1154 PetscInt ii; 1155 for (ii=0;ii<B_neigs;ii++) { 1156 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1157 for (j=0;j<B_N;j++) { 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1159 } 1160 } 1161 } 1162 ierr = ISCreateStride(PETSC_COMM_SELF,subset_size,cum2,1,&issubs);CHKERRQ(ierr); 1163 ierr = KSPGetOperators(sub_schurs->change,&change,NULL);CHKERRQ(ierr); 1164 ierr = MatGetSubMatrix(change,issubs,issubs,MAT_INITIAL_MATRIX,&change_subs);CHKERRQ(ierr); 1165 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1166 ierr = MatMatMult(change_subs,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1167 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1168 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1169 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1170 ierr = MatDestroy(&change_subs);CHKERRQ(ierr); 1171 ierr = ISDestroy(&issubs);CHKERRQ(ierr); 1172 } 1173 maxneigs = PetscMax(B_neigs,maxneigs); 1174 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1175 if (B_neigs) { 1176 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1177 1178 if (pcbddc->dbg_flag > 1) { 1179 PetscInt ii; 1180 for (ii=0;ii<B_neigs;ii++) { 1181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1182 for (j=0;j<B_N;j++) { 1183 #if defined(PETSC_USE_COMPLEX) 1184 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1185 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1187 #else 1188 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1189 #endif 1190 } 1191 } 1192 } 1193 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1194 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1195 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1196 cum++; 1197 } 1198 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1199 /* shift for next computation */ 1200 cumarray += subset_size*subset_size; 1201 cum2 += subset_size; 1202 } 1203 if (pcbddc->dbg_flag) { 1204 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1205 } 1206 1207 if (mss) { 1208 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1209 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1210 /* destroy matrices (junk) */ 1211 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1212 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1213 } 1214 if (allocated_S_St) { 1215 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1216 } 1217 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1218 #if defined(PETSC_USE_COMPLEX) 1219 ierr = PetscFree(rwork);CHKERRQ(ierr); 1220 #endif 1221 if (pcbddc->dbg_flag) { 1222 PetscInt maxneigs_r; 1223 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "PCBDDCSetUpSolvers" 1231 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1232 { 1233 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1234 PetscScalar *coarse_submat_vals; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1239 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1240 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1241 1242 /* Setup local neumann solver ksp_R */ 1243 /* PCBDDCSetUpLocalScatters should be called first! */ 1244 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1245 1246 /* Change global null space passed in by the user if change of basis has been requested */ 1247 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1248 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1249 } 1250 1251 /* 1252 Setup local correction and local part of coarse basis. 1253 Gives back the dense local part of the coarse matrix in column major ordering 1254 */ 1255 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1256 1257 /* Compute total number of coarse nodes and setup coarse solver */ 1258 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1259 1260 /* free */ 1261 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "PCBDDCResetCustomization" 1267 PetscErrorCode PCBDDCResetCustomization(PC pc) 1268 { 1269 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1270 PetscErrorCode ierr; 1271 1272 PetscFunctionBegin; 1273 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1274 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1275 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1276 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1277 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1278 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1279 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1280 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1281 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1282 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1283 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1284 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "PCBDDCResetTopography" 1290 PetscErrorCode PCBDDCResetTopography(PC pc) 1291 { 1292 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1293 PetscInt i; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1298 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1299 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1300 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1301 for (i=0;i<pcbddc->n_local_subs;i++) { 1302 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1305 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "PCBDDCResetSolvers" 1311 PetscErrorCode PCBDDCResetSolvers(PC pc) 1312 { 1313 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1318 if (pcbddc->coarse_phi_B) { 1319 PetscScalar *array; 1320 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1321 ierr = PetscFree(array);CHKERRQ(ierr); 1322 } 1323 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1324 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1325 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1326 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1327 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1328 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1329 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1330 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1331 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1332 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1333 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1334 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1335 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1336 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1337 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1338 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1339 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1341 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1342 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1343 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1344 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1345 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1346 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1347 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1348 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1349 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1350 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1351 if (pcbddc->benign_zerodiag_subs) { 1352 PetscInt i; 1353 for (i=0;i<pcbddc->benign_n;i++) { 1354 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1355 } 1356 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1357 } 1358 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1364 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1365 { 1366 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1367 PC_IS *pcis = (PC_IS*)pc->data; 1368 VecType impVecType; 1369 PetscInt n_constraints,n_R,old_size; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 if (!pcbddc->ConstraintMatrix) { 1374 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1375 } 1376 /* get sizes */ 1377 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1378 n_R = pcis->n - pcbddc->n_vertices; 1379 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1380 /* local work vectors (try to avoid unneeded work)*/ 1381 /* R nodes */ 1382 old_size = -1; 1383 if (pcbddc->vec1_R) { 1384 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1385 } 1386 if (n_R != old_size) { 1387 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1388 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1389 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1390 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1391 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1392 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1393 } 1394 /* local primal dofs */ 1395 old_size = -1; 1396 if (pcbddc->vec1_P) { 1397 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1398 } 1399 if (pcbddc->local_primal_size != old_size) { 1400 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1401 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1402 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1403 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1404 } 1405 /* local explicit constraints */ 1406 old_size = -1; 1407 if (pcbddc->vec1_C) { 1408 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1409 } 1410 if (n_constraints && n_constraints != old_size) { 1411 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1412 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1413 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1414 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "PCBDDCSetUpCorrection" 1421 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1422 { 1423 PetscErrorCode ierr; 1424 /* pointers to pcis and pcbddc */ 1425 PC_IS* pcis = (PC_IS*)pc->data; 1426 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1427 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1428 /* submatrices of local problem */ 1429 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1430 /* submatrices for benign trick */ 1431 Mat B0_R = NULL, B0_V = NULL; 1432 /* submatrices of local coarse problem */ 1433 Mat S_VV,S_CV,S_VC,S_CC; 1434 /* working matrices */ 1435 Mat C_CR; 1436 /* additional working stuff */ 1437 PC pc_R; 1438 Mat F; 1439 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1440 1441 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1442 PetscScalar *work; 1443 PetscInt *idx_V_B; 1444 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1445 PetscInt i,n_R,n_D,n_B; 1446 1447 /* some shortcuts to scalars */ 1448 PetscScalar one=1.0,m_one=-1.0; 1449 1450 PetscFunctionBegin; 1451 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1452 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1453 } 1454 1455 /* Set Non-overlapping dimensions */ 1456 n_vertices = pcbddc->n_vertices; 1457 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1458 n_B = pcis->n_B; 1459 n_D = pcis->n - n_B; 1460 n_R = pcis->n - n_vertices; 1461 1462 /* vertices in boundary numbering */ 1463 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1464 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1465 if (i != n_vertices) { 1466 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1467 } 1468 1469 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1470 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1471 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1472 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1473 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1474 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1475 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1476 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1477 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1478 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1479 1480 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1481 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1482 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1483 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1484 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1485 lda_rhs = n_R; 1486 need_benign_correction = PETSC_FALSE; 1487 if (isLU || isILU || isCHOL) { 1488 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1489 } else if (sub_schurs->reuse_solver) { 1490 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1491 MatFactorType type; 1492 1493 F = reuse_solver->F; 1494 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1495 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1496 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1497 need_benign_correction = !!reuse_solver->benign_n; 1498 } else { 1499 F = NULL; 1500 } 1501 1502 /* allocate workspace */ 1503 n = 0; 1504 if (n_constraints) { 1505 n += lda_rhs*n_constraints; 1506 } 1507 if (n_vertices) { 1508 n = PetscMax(2*lda_rhs*n_vertices,n); 1509 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1510 } 1511 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1512 1513 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1514 if (n_constraints) { 1515 Mat M1,M2,M3; 1516 Mat auxmat; 1517 IS is_aux; 1518 PetscScalar *array,*array2; 1519 1520 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1521 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1522 1523 /* Extract constraints on R nodes: C_{CR} */ 1524 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1525 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1526 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1527 1528 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1529 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1530 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1531 for (i=0;i<n_constraints;i++) { 1532 const PetscScalar *row_cmat_values; 1533 const PetscInt *row_cmat_indices; 1534 PetscInt size_of_constraint,j; 1535 1536 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1537 for (j=0;j<size_of_constraint;j++) { 1538 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1539 } 1540 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1541 } 1542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1543 if (F) { 1544 Mat B; 1545 1546 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1547 if (need_benign_correction) { 1548 PetscScalar *marr; 1549 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1550 1551 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1552 for (i=0;i<n_constraints;i++) { 1553 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1554 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1556 } 1557 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1558 } 1559 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1560 if (need_benign_correction) { 1561 PetscScalar *marr; 1562 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1563 1564 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1565 for (i=0;i<n_constraints;i++) { 1566 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1567 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1568 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1569 } 1570 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1571 } 1572 ierr = MatDestroy(&B);CHKERRQ(ierr); 1573 } else { 1574 PetscScalar *marr; 1575 1576 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1577 for (i=0;i<n_constraints;i++) { 1578 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1579 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1580 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1581 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1582 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1583 } 1584 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1585 } 1586 if (!pcbddc->switch_static) { 1587 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1588 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1589 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1590 for (i=0;i<n_constraints;i++) { 1591 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1592 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1593 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1594 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1596 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1597 } 1598 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1599 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1600 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1601 } else { 1602 if (lda_rhs != n_R) { 1603 IS dummy; 1604 1605 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1606 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1607 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1608 } else { 1609 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1610 pcbddc->local_auxmat2 = local_auxmat2_R; 1611 } 1612 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1613 } 1614 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1615 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1616 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1617 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1618 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1619 if (isCHOL) { 1620 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1623 } 1624 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1625 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1626 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1627 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1628 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1629 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1630 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1631 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1632 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1633 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1634 } 1635 1636 /* Extract B0_R */ 1637 if (pcbddc->benign_n) { 1638 IS dummy; 1639 PetscReal norm; 1640 1641 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1642 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1643 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1644 if (norm < PETSC_SMALL) { 1645 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1646 } 1647 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1648 } 1649 1650 /* Get submatrices from subdomain matrix */ 1651 if (n_vertices) { 1652 IS is_aux; 1653 1654 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1655 IS tis; 1656 1657 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1658 ierr = ISSort(tis);CHKERRQ(ierr); 1659 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1660 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1661 } else { 1662 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1663 } 1664 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1665 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1666 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1667 if (pcbddc->benign_n) { 1668 IS dummy; 1669 1670 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1671 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1672 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1673 } 1674 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1675 } 1676 1677 /* Matrix of coarse basis functions (local) */ 1678 if (pcbddc->coarse_phi_B) { 1679 PetscInt on_B,on_primal,on_D=n_D; 1680 if (pcbddc->coarse_phi_D) { 1681 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1682 } 1683 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1684 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1685 PetscScalar *marray; 1686 1687 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1688 ierr = PetscFree(marray);CHKERRQ(ierr); 1689 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1690 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1691 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1692 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1693 } 1694 } 1695 1696 if (!pcbddc->coarse_phi_B) { 1697 PetscScalar *marray; 1698 1699 n = n_B*pcbddc->local_primal_size; 1700 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1701 n += n_D*pcbddc->local_primal_size; 1702 } 1703 if (!pcbddc->symmetric_primal) { 1704 n *= 2; 1705 } 1706 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1707 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1708 n = n_B*pcbddc->local_primal_size; 1709 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1710 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1711 n += n_D*pcbddc->local_primal_size; 1712 } 1713 if (!pcbddc->symmetric_primal) { 1714 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1715 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1716 n = n_B*pcbddc->local_primal_size; 1717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1718 } 1719 } else { 1720 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1721 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1722 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1723 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1724 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1725 } 1726 } 1727 } 1728 1729 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1730 p0_lidx_I = NULL; 1731 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1732 const PetscInt *idxs; 1733 1734 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1735 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1736 for (i=0;i<pcbddc->benign_n;i++) { 1737 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1738 } 1739 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1740 } 1741 1742 /* vertices */ 1743 if (n_vertices) { 1744 1745 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1746 1747 if (n_R) { 1748 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1749 PetscBLASInt B_N,B_one = 1; 1750 PetscScalar *x,*y; 1751 PetscBool isseqaij; 1752 1753 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1754 if (need_benign_correction) { 1755 ISLocalToGlobalMapping RtoN; 1756 IS is_p0; 1757 PetscInt *idxs_p0,n; 1758 1759 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1760 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1761 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1762 if (n != pcbddc->benign_n) { 1763 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1764 } 1765 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1766 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1767 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1768 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1769 } 1770 1771 if (lda_rhs == n_R) { 1772 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1773 } else { 1774 PetscScalar *av,*array; 1775 const PetscInt *xadj,*adjncy; 1776 PetscInt n; 1777 PetscBool flg_row; 1778 1779 array = work+lda_rhs*n_vertices; 1780 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1781 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1782 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1784 for (i=0;i<n;i++) { 1785 PetscInt j; 1786 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1787 } 1788 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1789 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1790 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1791 } 1792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1793 if (F) { 1794 if (need_benign_correction) { 1795 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1796 PetscScalar *marr; 1797 1798 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1799 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1800 1801 | 0 0 0 | (V) 1802 L = | 0 0 -1 | (P-p0) 1803 | 0 0 -1 | (p0) 1804 1805 */ 1806 for (i=0;i<reuse_solver->benign_n;i++) { 1807 const PetscScalar *vals; 1808 const PetscInt *idxs,*idxs_zero; 1809 PetscInt n,j,nz; 1810 1811 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1812 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1813 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1814 for (j=0;j<n;j++) { 1815 PetscScalar val = vals[j]; 1816 PetscInt k,col = idxs[j]; 1817 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1818 } 1819 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1820 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1821 } 1822 /* need to correct the rhs */ 1823 for (i=0;i<n_vertices;i++) { 1824 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1825 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1827 } 1828 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1829 } 1830 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1831 /* need to correct the solution */ 1832 if (need_benign_correction) { 1833 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1834 PetscScalar *marr; 1835 1836 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1837 for (i=0;i<n_vertices;i++) { 1838 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1839 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1840 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1841 } 1842 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1843 } 1844 } else { 1845 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1846 for (i=0;i<n_vertices;i++) { 1847 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1848 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1849 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1850 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1851 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1852 } 1853 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1854 } 1855 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1856 /* S_VV and S_CV */ 1857 if (n_constraints) { 1858 Mat B; 1859 1860 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1861 for (i=0;i<n_vertices;i++) { 1862 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1863 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1864 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1867 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1868 } 1869 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1870 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1871 ierr = MatDestroy(&B);CHKERRQ(ierr); 1872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1873 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1874 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1875 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1876 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1877 ierr = MatDestroy(&B);CHKERRQ(ierr); 1878 } 1879 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1880 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1881 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1882 } 1883 if (lda_rhs != n_R) { 1884 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1885 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1886 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1887 } 1888 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1889 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1890 if (need_benign_correction) { 1891 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1892 PetscScalar *marr,*sums; 1893 1894 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1895 ierr = MatDenseGetArray(S_VVt,&marr); 1896 for (i=0;i<reuse_solver->benign_n;i++) { 1897 const PetscScalar *vals; 1898 const PetscInt *idxs,*idxs_zero; 1899 PetscInt n,j,nz; 1900 1901 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1902 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1903 for (j=0;j<n_vertices;j++) { 1904 PetscInt k; 1905 sums[j] = 0.; 1906 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1907 } 1908 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1909 for (j=0;j<n;j++) { 1910 PetscScalar val = vals[j]; 1911 PetscInt k; 1912 for (k=0;k<n_vertices;k++) { 1913 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1914 } 1915 } 1916 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1917 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1918 } 1919 ierr = PetscFree(sums);CHKERRQ(ierr); 1920 ierr = MatDenseRestoreArray(S_VVt,&marr); 1921 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1922 } 1923 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1924 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1925 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1926 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1927 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1928 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1929 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1930 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1931 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1932 } else { 1933 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1934 } 1935 for (i=0;i<pcbddc->benign_n;i++) { 1936 const PetscScalar *vals; 1937 const PetscInt *idxs; 1938 PetscInt n,j,primal_idx; 1939 1940 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1941 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1942 for (j=0;j<n;j++) { 1943 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1944 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1945 } 1946 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1947 } 1948 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1949 1950 /* coarse basis functions */ 1951 for (i=0;i<n_vertices;i++) { 1952 PetscScalar *y; 1953 1954 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1955 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1956 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1957 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1959 y[n_B*i+idx_V_B[i]] = 1.0; 1960 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1962 1963 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1964 PetscInt j; 1965 1966 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1967 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1968 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1969 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1971 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1972 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1973 } 1974 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1975 } 1976 /* if n_R == 0 the object is not destroyed */ 1977 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1978 } 1979 1980 if (n_constraints) { 1981 Mat B; 1982 1983 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1984 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1985 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1986 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1987 if (n_vertices) { 1988 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1989 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1990 } else { 1991 Mat S_VCt; 1992 1993 if (lda_rhs != n_R) { 1994 ierr = MatDestroy(&B);CHKERRQ(ierr); 1995 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1996 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1997 } 1998 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1999 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2000 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2001 } 2002 } 2003 ierr = MatDestroy(&B);CHKERRQ(ierr); 2004 /* coarse basis functions */ 2005 for (i=0;i<n_constraints;i++) { 2006 PetscScalar *y; 2007 2008 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2009 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2010 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2011 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2014 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2015 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2016 PetscInt j; 2017 2018 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2019 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2020 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2021 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2023 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2024 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2025 } 2026 if (B0_R) { 2027 Vec benign_proj; 2028 const PetscScalar *array; 2029 PetscInt j; 2030 2031 ierr = MatCreateVecs(B0_R,NULL,&benign_proj);CHKERRQ(ierr); 2032 ierr = MatMult(B0_R,pcbddc->vec1_R,benign_proj);CHKERRQ(ierr); 2033 ierr = VecGetArrayRead(benign_proj,&array);CHKERRQ(ierr); 2034 for (j=0;j<pcbddc->benign_n;j++) { 2035 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2036 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i+n_vertices] = array[j]; 2037 coarse_submat_vals[(i+n_vertices)*pcbddc->local_primal_size+primal_idx] = array[j]; 2038 } 2039 ierr = VecRestoreArrayRead(benign_proj,&array);CHKERRQ(ierr); 2040 ierr = VecDestroy(&benign_proj);CHKERRQ(ierr); 2041 } 2042 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2043 } 2044 } 2045 if (n_constraints) { 2046 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2047 } 2048 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2049 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2050 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 2051 2052 /* compute other basis functions for non-symmetric problems */ 2053 if (!pcbddc->symmetric_primal) { 2054 Mat B_V=NULL,B_C=NULL; 2055 PetscScalar *marray; 2056 2057 if (n_constraints) { 2058 Mat S_CCT,C_CRT; 2059 2060 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2061 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2062 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2063 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2064 if (n_vertices) { 2065 Mat S_VCT; 2066 2067 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2068 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2069 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2070 } 2071 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2072 } 2073 if (n_vertices && n_R) { 2074 PetscScalar *av,*marray; 2075 const PetscInt *xadj,*adjncy; 2076 PetscInt n; 2077 PetscBool flg_row; 2078 2079 /* B_V = B_V - A_VR^T */ 2080 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2081 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2082 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2083 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2084 for (i=0;i<n;i++) { 2085 PetscInt j; 2086 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2087 } 2088 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2089 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2090 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2091 } 2092 2093 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2094 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2095 for (i=0;i<n_vertices;i++) { 2096 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2097 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2098 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2099 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2100 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2101 } 2102 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2103 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2104 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2105 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2106 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2107 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2108 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2109 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2110 } 2111 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2112 /* coarse basis functions */ 2113 for (i=0;i<pcbddc->local_primal_size;i++) { 2114 PetscScalar *y; 2115 2116 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2117 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2118 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2119 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2121 if (i<n_vertices) { 2122 y[n_B*i+idx_V_B[i]] = 1.0; 2123 } 2124 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2125 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2126 2127 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2128 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2129 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2130 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2131 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2133 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2134 } 2135 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2136 } 2137 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2138 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2139 } 2140 /* free memory */ 2141 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2142 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2143 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2144 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2145 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2146 ierr = PetscFree(work);CHKERRQ(ierr); 2147 if (n_vertices) { 2148 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2149 } 2150 if (n_constraints) { 2151 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2152 } 2153 /* Checking coarse_sub_mat and coarse basis functios */ 2154 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2155 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2156 if (pcbddc->dbg_flag) { 2157 Mat coarse_sub_mat; 2158 Mat AUXMAT,TM1,TM2,TM3,TM4; 2159 Mat coarse_phi_D,coarse_phi_B; 2160 Mat coarse_psi_D,coarse_psi_B; 2161 Mat A_II,A_BB,A_IB,A_BI; 2162 Mat C_B,CPHI; 2163 IS is_dummy; 2164 Vec mones; 2165 MatType checkmattype=MATSEQAIJ; 2166 PetscReal real_value; 2167 2168 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2169 Mat A; 2170 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2171 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2172 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2173 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2174 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2175 ierr = MatDestroy(&A);CHKERRQ(ierr); 2176 } else { 2177 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2178 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2179 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2180 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2181 } 2182 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2183 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2184 if (!pcbddc->symmetric_primal) { 2185 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2186 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2187 } 2188 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2189 2190 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2191 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2192 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2193 if (!pcbddc->symmetric_primal) { 2194 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2195 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2196 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2197 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2198 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2199 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2200 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2201 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2202 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2203 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2204 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2205 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2206 } else { 2207 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2208 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2209 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2210 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2211 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2212 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2213 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2214 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2215 } 2216 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2217 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2218 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2219 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2220 if (pcbddc->benign_n) { 2221 Mat B0_B,B0_BPHI; 2222 PetscScalar *data,*data2; 2223 PetscInt j; 2224 2225 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2226 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2227 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2228 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2229 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2230 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2231 for (j=0;j<pcbddc->benign_n;j++) { 2232 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2233 for (i=0;i<pcbddc->local_primal_size;i++) { 2234 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2235 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2236 } 2237 } 2238 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2239 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2240 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2241 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2242 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2243 } 2244 #if 0 2245 { 2246 PetscViewer viewer; 2247 char filename[256]; 2248 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2249 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2250 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2252 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2253 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2254 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2255 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2256 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2257 if (pcbddc->coarse_phi_D) { 2258 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2259 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2260 } 2261 if (pcbddc->coarse_psi_B) { 2262 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2263 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2264 } 2265 if (pcbddc->coarse_phi_B) { 2266 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2267 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2268 } 2269 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2270 } 2271 #endif 2272 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2273 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2274 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2276 2277 /* check constraints */ 2278 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2279 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2280 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2281 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2282 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2283 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2284 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2285 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2287 if (!pcbddc->symmetric_primal) { 2288 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2289 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2290 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2291 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2292 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2293 } 2294 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2295 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2296 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2297 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2298 } 2299 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2300 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2301 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2302 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2303 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2304 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2305 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2306 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2307 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2308 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2309 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2310 if (!pcbddc->symmetric_primal) { 2311 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2312 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2313 } 2314 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2315 } 2316 /* get back data */ 2317 *coarse_submat_vals_n = coarse_submat_vals; 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2323 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2324 { 2325 Mat *work_mat; 2326 IS isrow_s,iscol_s; 2327 PetscBool rsorted,csorted; 2328 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2329 PetscErrorCode ierr; 2330 2331 PetscFunctionBegin; 2332 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2333 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2334 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2335 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2336 2337 if (!rsorted) { 2338 const PetscInt *idxs; 2339 PetscInt *idxs_sorted,i; 2340 2341 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2342 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2343 for (i=0;i<rsize;i++) { 2344 idxs_perm_r[i] = i; 2345 } 2346 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2347 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2348 for (i=0;i<rsize;i++) { 2349 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2350 } 2351 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2352 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2353 } else { 2354 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2355 isrow_s = isrow; 2356 } 2357 2358 if (!csorted) { 2359 if (isrow == iscol) { 2360 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2361 iscol_s = isrow_s; 2362 } else { 2363 const PetscInt *idxs; 2364 PetscInt *idxs_sorted,i; 2365 2366 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2367 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2368 for (i=0;i<csize;i++) { 2369 idxs_perm_c[i] = i; 2370 } 2371 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2372 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2373 for (i=0;i<csize;i++) { 2374 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2375 } 2376 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2377 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2378 } 2379 } else { 2380 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2381 iscol_s = iscol; 2382 } 2383 2384 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2385 2386 if (!rsorted || !csorted) { 2387 Mat new_mat; 2388 IS is_perm_r,is_perm_c; 2389 2390 if (!rsorted) { 2391 PetscInt *idxs_r,i; 2392 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2393 for (i=0;i<rsize;i++) { 2394 idxs_r[idxs_perm_r[i]] = i; 2395 } 2396 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2397 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2398 } else { 2399 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2400 } 2401 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2402 2403 if (!csorted) { 2404 if (isrow_s == iscol_s) { 2405 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2406 is_perm_c = is_perm_r; 2407 } else { 2408 PetscInt *idxs_c,i; 2409 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2410 for (i=0;i<csize;i++) { 2411 idxs_c[idxs_perm_c[i]] = i; 2412 } 2413 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2414 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2415 } 2416 } else { 2417 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2418 } 2419 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2420 2421 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2422 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2423 work_mat[0] = new_mat; 2424 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2425 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2426 } 2427 2428 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2429 *B = work_mat[0]; 2430 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2431 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2432 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2438 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2439 { 2440 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2441 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2442 Mat new_mat; 2443 IS is_local,is_global; 2444 PetscInt local_size; 2445 PetscBool isseqaij; 2446 PetscErrorCode ierr; 2447 2448 PetscFunctionBegin; 2449 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2450 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2451 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2452 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2453 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2454 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2455 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2456 2457 /* check */ 2458 if (pcbddc->dbg_flag) { 2459 Vec x,x_change; 2460 PetscReal error; 2461 2462 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2463 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2464 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2465 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2466 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2467 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2468 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2469 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2470 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2471 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2472 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2473 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2474 ierr = VecDestroy(&x);CHKERRQ(ierr); 2475 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2476 } 2477 2478 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2479 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2480 if (isseqaij) { 2481 Mat M; 2482 2483 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2484 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2485 ierr = MatDestroy(&M);CHKERRQ(ierr); 2486 } else { 2487 Mat work_mat,M; 2488 2489 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2490 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2491 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2492 ierr = MatDestroy(&M);CHKERRQ(ierr); 2493 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2494 } 2495 if (matis->A->symmetric_set) { 2496 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2497 #if !defined(PETSC_USE_COMPLEX) 2498 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2499 #endif 2500 } 2501 /* 2502 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2503 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2504 */ 2505 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2511 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2512 { 2513 PC_IS* pcis = (PC_IS*)(pc->data); 2514 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2515 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2516 PetscInt *idx_R_local=NULL; 2517 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2518 PetscInt vbs,bs; 2519 PetscBT bitmask=NULL; 2520 PetscErrorCode ierr; 2521 2522 PetscFunctionBegin; 2523 /* 2524 No need to setup local scatters if 2525 - primal space is unchanged 2526 AND 2527 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2528 AND 2529 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2530 */ 2531 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2532 PetscFunctionReturn(0); 2533 } 2534 /* destroy old objects */ 2535 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2536 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2537 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2538 /* Set Non-overlapping dimensions */ 2539 n_B = pcis->n_B; 2540 n_D = pcis->n - n_B; 2541 n_vertices = pcbddc->n_vertices; 2542 2543 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2544 2545 /* create auxiliary bitmask and allocate workspace */ 2546 if (!sub_schurs->reuse_solver) { 2547 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2548 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2549 for (i=0;i<n_vertices;i++) { 2550 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2551 } 2552 2553 for (i=0, n_R=0; i<pcis->n; i++) { 2554 if (!PetscBTLookup(bitmask,i)) { 2555 idx_R_local[n_R++] = i; 2556 } 2557 } 2558 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2559 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2560 2561 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2562 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2563 } 2564 2565 /* Block code */ 2566 vbs = 1; 2567 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2568 if (bs>1 && !(n_vertices%bs)) { 2569 PetscBool is_blocked = PETSC_TRUE; 2570 PetscInt *vary; 2571 if (!sub_schurs->reuse_solver) { 2572 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2573 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2574 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2575 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2576 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2577 for (i=0; i<pcis->n/bs; i++) { 2578 if (vary[i]!=0 && vary[i]!=bs) { 2579 is_blocked = PETSC_FALSE; 2580 break; 2581 } 2582 } 2583 ierr = PetscFree(vary);CHKERRQ(ierr); 2584 } else { 2585 /* Verify directly the R set */ 2586 for (i=0; i<n_R/bs; i++) { 2587 PetscInt j,node=idx_R_local[bs*i]; 2588 for (j=1; j<bs; j++) { 2589 if (node != idx_R_local[bs*i+j]-j) { 2590 is_blocked = PETSC_FALSE; 2591 break; 2592 } 2593 } 2594 } 2595 } 2596 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2597 vbs = bs; 2598 for (i=0;i<n_R/vbs;i++) { 2599 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2600 } 2601 } 2602 } 2603 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2604 if (sub_schurs->reuse_solver) { 2605 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2606 2607 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2608 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2609 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2610 reuse_solver->is_R = pcbddc->is_R_local; 2611 } else { 2612 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2613 } 2614 2615 /* print some info if requested */ 2616 if (pcbddc->dbg_flag) { 2617 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2618 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2619 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2623 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2624 } 2625 2626 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2627 if (!sub_schurs->reuse_solver) { 2628 IS is_aux1,is_aux2; 2629 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2630 2631 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2632 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2633 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2634 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2635 for (i=0; i<n_D; i++) { 2636 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2637 } 2638 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2639 for (i=0, j=0; i<n_R; i++) { 2640 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2641 aux_array1[j++] = i; 2642 } 2643 } 2644 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2645 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2646 for (i=0, j=0; i<n_B; i++) { 2647 if (!PetscBTLookup(bitmask,is_indices[i])) { 2648 aux_array2[j++] = i; 2649 } 2650 } 2651 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2652 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2653 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2654 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2655 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2656 2657 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2658 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2659 for (i=0, j=0; i<n_R; i++) { 2660 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2661 aux_array1[j++] = i; 2662 } 2663 } 2664 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2665 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2666 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2667 } 2668 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2669 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2670 } else { 2671 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2672 IS tis; 2673 PetscInt schur_size; 2674 2675 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2676 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2677 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2678 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2679 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2680 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2681 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2682 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2683 } 2684 } 2685 PetscFunctionReturn(0); 2686 } 2687 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2691 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2692 { 2693 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2694 PC_IS *pcis = (PC_IS*)pc->data; 2695 PC pc_temp; 2696 Mat A_RR; 2697 MatReuse reuse; 2698 PetscScalar m_one = -1.0; 2699 PetscReal value; 2700 PetscInt n_D,n_R; 2701 PetscBool use_exact,use_exact_reduced,issbaij; 2702 PetscErrorCode ierr; 2703 /* prefixes stuff */ 2704 char dir_prefix[256],neu_prefix[256],str_level[16]; 2705 size_t len; 2706 2707 PetscFunctionBegin; 2708 2709 /* compute prefixes */ 2710 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2711 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2712 if (!pcbddc->current_level) { 2713 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2714 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2715 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2716 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2717 } else { 2718 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2719 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2720 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2721 len -= 15; /* remove "pc_bddc_coarse_" */ 2722 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2723 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2724 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2725 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2726 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2727 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2728 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2729 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2730 } 2731 2732 /* DIRICHLET PROBLEM */ 2733 if (dirichlet) { 2734 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2735 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2736 if (!sub_schurs->reuse_solver) { 2737 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2738 } 2739 if (pcbddc->dbg_flag) { 2740 Mat A_IIn; 2741 2742 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2743 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2744 pcis->A_II = A_IIn; 2745 } 2746 } 2747 if (pcbddc->local_mat->symmetric_set) { 2748 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2749 } 2750 /* Matrix for Dirichlet problem is pcis->A_II */ 2751 n_D = pcis->n - pcis->n_B; 2752 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2753 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2754 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2755 /* default */ 2756 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2757 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2758 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2759 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2760 if (issbaij) { 2761 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2762 } else { 2763 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2764 } 2765 /* Allow user's customization */ 2766 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2767 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2768 } 2769 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2770 if (sub_schurs->reuse_solver) { 2771 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2772 2773 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2774 } 2775 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2776 if (!n_D) { 2777 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2778 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2779 } 2780 /* Set Up KSP for Dirichlet problem of BDDC */ 2781 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2782 /* set ksp_D into pcis data */ 2783 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2784 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2785 pcis->ksp_D = pcbddc->ksp_D; 2786 } 2787 2788 /* NEUMANN PROBLEM */ 2789 A_RR = 0; 2790 if (neumann) { 2791 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2792 PetscInt ibs,mbs; 2793 PetscBool issbaij; 2794 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2795 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2796 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2797 if (pcbddc->ksp_R) { /* already created ksp */ 2798 PetscInt nn_R; 2799 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2800 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2801 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2802 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2803 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2804 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2805 reuse = MAT_INITIAL_MATRIX; 2806 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2807 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2808 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2809 reuse = MAT_INITIAL_MATRIX; 2810 } else { /* safe to reuse the matrix */ 2811 reuse = MAT_REUSE_MATRIX; 2812 } 2813 } 2814 /* last check */ 2815 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2816 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2817 reuse = MAT_INITIAL_MATRIX; 2818 } 2819 } else { /* first time, so we need to create the matrix */ 2820 reuse = MAT_INITIAL_MATRIX; 2821 } 2822 /* extract A_RR */ 2823 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2824 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2825 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2826 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2827 if (matis->A == pcbddc->local_mat) { 2828 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2829 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2830 } else { 2831 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2832 } 2833 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2834 if (matis->A == pcbddc->local_mat) { 2835 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2836 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2837 } else { 2838 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2839 } 2840 } 2841 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2842 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2843 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2844 } else { 2845 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2846 } 2847 if (pcbddc->local_mat->symmetric_set) { 2848 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2849 } 2850 if (!pcbddc->ksp_R) { /* create object if not present */ 2851 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2852 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2853 /* default */ 2854 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2855 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2856 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2857 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2858 if (issbaij) { 2859 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2860 } else { 2861 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2862 } 2863 /* Allow user's customization */ 2864 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2865 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2866 } 2867 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2868 if (!n_R) { 2869 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2870 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2871 } 2872 /* Reuse solver if it is present */ 2873 if (sub_schurs->reuse_solver) { 2874 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2875 2876 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2877 } 2878 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2879 /* Set Up KSP for Neumann problem of BDDC */ 2880 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2881 } 2882 /* free Neumann problem's matrix */ 2883 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2884 2885 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2886 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2887 if (pcbddc->dbg_flag) { 2888 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2889 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2890 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2891 } 2892 if (dirichlet) { /* Dirichlet */ 2893 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2894 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2895 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2896 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2897 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2898 /* need to be adapted? */ 2899 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2900 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2901 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2902 /* print info */ 2903 if (pcbddc->dbg_flag) { 2904 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2906 } 2907 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2908 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2909 } 2910 } 2911 if (neumann) { /* Neumann */ 2912 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2913 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2914 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2915 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2916 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2917 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2918 /* need to be adapted? */ 2919 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2920 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2921 /* print info */ 2922 if (pcbddc->dbg_flag) { 2923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2924 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2925 } 2926 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2927 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2928 } 2929 } 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2936 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2937 { 2938 PetscErrorCode ierr; 2939 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2940 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2941 2942 PetscFunctionBegin; 2943 if (!sub_schurs->reuse_solver) { 2944 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2945 } 2946 if (!pcbddc->switch_static) { 2947 if (applytranspose && pcbddc->local_auxmat1) { 2948 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2949 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2950 } 2951 if (!sub_schurs->reuse_solver) { 2952 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2953 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2954 } else { 2955 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2956 2957 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2958 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2959 } 2960 } else { 2961 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2962 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2963 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2964 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2965 if (applytranspose && pcbddc->local_auxmat1) { 2966 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2967 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2968 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2969 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2970 } 2971 } 2972 if (!sub_schurs->reuse_solver) { 2973 if (applytranspose) { 2974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2975 } else { 2976 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2977 } 2978 } else { 2979 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2980 2981 if (applytranspose) { 2982 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2983 } else { 2984 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2985 } 2986 } 2987 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2988 if (!pcbddc->switch_static) { 2989 if (!sub_schurs->reuse_solver) { 2990 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2991 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2992 } else { 2993 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2994 2995 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2996 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2997 } 2998 if (!applytranspose && pcbddc->local_auxmat1) { 2999 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3000 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3001 } 3002 } else { 3003 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3004 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3005 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3006 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3007 if (!applytranspose && pcbddc->local_auxmat1) { 3008 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3009 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3010 } 3011 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3012 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3015 } 3016 PetscFunctionReturn(0); 3017 } 3018 3019 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3020 #undef __FUNCT__ 3021 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3022 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3023 { 3024 PetscErrorCode ierr; 3025 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3026 PC_IS* pcis = (PC_IS*) (pc->data); 3027 const PetscScalar zero = 0.0; 3028 3029 PetscFunctionBegin; 3030 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3031 if (!pcbddc->benign_apply_coarse_only) { 3032 if (applytranspose) { 3033 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3034 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3035 } else { 3036 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3037 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3038 } 3039 } else { 3040 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3041 } 3042 3043 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3044 if (pcbddc->benign_n) { 3045 PetscScalar *array; 3046 PetscInt j; 3047 3048 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3049 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3050 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3051 } 3052 3053 /* start communications from local primal nodes to rhs of coarse solver */ 3054 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3055 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3056 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3057 3058 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3059 /* TODO remove null space when doing multilevel */ 3060 if (pcbddc->coarse_ksp) { 3061 Mat coarse_mat; 3062 Vec rhs,sol; 3063 MatNullSpace nullsp; 3064 3065 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3066 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3067 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3068 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3069 if (nullsp) { 3070 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3071 } 3072 if (applytranspose) { 3073 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3074 } else { 3075 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3076 } 3077 if (nullsp) { 3078 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3079 } 3080 } 3081 3082 /* Local solution on R nodes */ 3083 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3084 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3085 } 3086 /* communications from coarse sol to local primal nodes */ 3087 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3088 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3089 3090 /* Sum contributions from the two levels */ 3091 if (!pcbddc->benign_apply_coarse_only) { 3092 if (applytranspose) { 3093 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3094 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3095 } else { 3096 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3097 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3098 } 3099 /* store p0 */ 3100 if (pcbddc->benign_n) { 3101 PetscScalar *array; 3102 PetscInt j; 3103 3104 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3105 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3106 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3107 } 3108 } else { /* expand the coarse solution */ 3109 if (applytranspose) { 3110 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3111 } else { 3112 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3113 } 3114 } 3115 PetscFunctionReturn(0); 3116 } 3117 3118 #undef __FUNCT__ 3119 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3120 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3121 { 3122 PetscErrorCode ierr; 3123 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3124 PetscScalar *array; 3125 Vec from,to; 3126 3127 PetscFunctionBegin; 3128 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3129 from = pcbddc->coarse_vec; 3130 to = pcbddc->vec1_P; 3131 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3132 Vec tvec; 3133 3134 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3135 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3136 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3137 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3138 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3139 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3140 } 3141 } else { /* from local to global -> put data in coarse right hand side */ 3142 from = pcbddc->vec1_P; 3143 to = pcbddc->coarse_vec; 3144 } 3145 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3146 PetscFunctionReturn(0); 3147 } 3148 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3151 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3152 { 3153 PetscErrorCode ierr; 3154 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3155 PetscScalar *array; 3156 Vec from,to; 3157 3158 PetscFunctionBegin; 3159 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3160 from = pcbddc->coarse_vec; 3161 to = pcbddc->vec1_P; 3162 } else { /* from local to global -> put data in coarse right hand side */ 3163 from = pcbddc->vec1_P; 3164 to = pcbddc->coarse_vec; 3165 } 3166 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3167 if (smode == SCATTER_FORWARD) { 3168 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3169 Vec tvec; 3170 3171 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3172 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3173 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3174 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3175 } 3176 } else { 3177 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3178 ierr = VecResetArray(from);CHKERRQ(ierr); 3179 } 3180 } 3181 PetscFunctionReturn(0); 3182 } 3183 3184 /* uncomment for testing purposes */ 3185 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3186 #undef __FUNCT__ 3187 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3188 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3189 { 3190 PetscErrorCode ierr; 3191 PC_IS* pcis = (PC_IS*)(pc->data); 3192 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3193 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3194 /* one and zero */ 3195 PetscScalar one=1.0,zero=0.0; 3196 /* space to store constraints and their local indices */ 3197 PetscScalar *constraints_data; 3198 PetscInt *constraints_idxs,*constraints_idxs_B; 3199 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3200 PetscInt *constraints_n; 3201 /* iterators */ 3202 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3203 /* BLAS integers */ 3204 PetscBLASInt lwork,lierr; 3205 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3206 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3207 /* reuse */ 3208 PetscInt olocal_primal_size,olocal_primal_size_cc; 3209 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3210 /* change of basis */ 3211 PetscBool qr_needed; 3212 PetscBT change_basis,qr_needed_idx; 3213 /* auxiliary stuff */ 3214 PetscInt *nnz,*is_indices; 3215 PetscInt ncc; 3216 /* some quantities */ 3217 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3218 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3219 3220 PetscFunctionBegin; 3221 /* Destroy Mat objects computed previously */ 3222 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3223 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3224 /* save info on constraints from previous setup (if any) */ 3225 olocal_primal_size = pcbddc->local_primal_size; 3226 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3227 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3228 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3229 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3230 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3231 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3232 3233 /* print some info */ 3234 if (pcbddc->dbg_flag) { 3235 IS vertices; 3236 PetscInt nv,nedges,nfaces; 3237 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3238 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3239 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3240 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3243 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3245 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3246 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3247 } 3248 3249 if (!pcbddc->adaptive_selection) { 3250 IS ISForVertices,*ISForFaces,*ISForEdges; 3251 MatNullSpace nearnullsp; 3252 const Vec *nearnullvecs; 3253 Vec *localnearnullsp; 3254 PetscScalar *array; 3255 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3256 PetscBool nnsp_has_cnst; 3257 /* LAPACK working arrays for SVD or POD */ 3258 PetscBool skip_lapack,boolforchange; 3259 PetscScalar *work; 3260 PetscReal *singular_vals; 3261 #if defined(PETSC_USE_COMPLEX) 3262 PetscReal *rwork; 3263 #endif 3264 #if defined(PETSC_MISSING_LAPACK_GESVD) 3265 PetscScalar *temp_basis,*correlation_mat; 3266 #else 3267 PetscBLASInt dummy_int=1; 3268 PetscScalar dummy_scalar=1.; 3269 #endif 3270 3271 /* Get index sets for faces, edges and vertices from graph */ 3272 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3273 /* free unneeded index sets */ 3274 if (!pcbddc->use_vertices) { 3275 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3276 } 3277 if (!pcbddc->use_edges) { 3278 for (i=0;i<n_ISForEdges;i++) { 3279 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3280 } 3281 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3282 n_ISForEdges = 0; 3283 } 3284 if (!pcbddc->use_faces) { 3285 for (i=0;i<n_ISForFaces;i++) { 3286 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3287 } 3288 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3289 n_ISForFaces = 0; 3290 } 3291 3292 #if defined(PETSC_USE_DEBUG) 3293 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3294 Also use_change_of_basis should be consistent among processors */ 3295 if (pcbddc->NullSpace) { 3296 PetscBool tbool[2],gbool[2]; 3297 3298 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3299 pcbddc->use_change_of_basis = PETSC_TRUE; 3300 if (!ISForEdges) { 3301 pcbddc->use_change_on_faces = PETSC_TRUE; 3302 } 3303 } 3304 tbool[0] = pcbddc->use_change_of_basis; 3305 tbool[1] = pcbddc->use_change_on_faces; 3306 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3307 pcbddc->use_change_of_basis = gbool[0]; 3308 pcbddc->use_change_on_faces = gbool[1]; 3309 } 3310 #endif 3311 3312 /* check if near null space is attached to global mat */ 3313 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3314 if (nearnullsp) { 3315 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3316 /* remove any stored info */ 3317 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3318 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3319 /* store information for BDDC solver reuse */ 3320 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3321 pcbddc->onearnullspace = nearnullsp; 3322 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3323 for (i=0;i<nnsp_size;i++) { 3324 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3325 } 3326 } else { /* if near null space is not provided BDDC uses constants by default */ 3327 nnsp_size = 0; 3328 nnsp_has_cnst = PETSC_TRUE; 3329 } 3330 /* get max number of constraints on a single cc */ 3331 max_constraints = nnsp_size; 3332 if (nnsp_has_cnst) max_constraints++; 3333 3334 /* 3335 Evaluate maximum storage size needed by the procedure 3336 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3337 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3338 There can be multiple constraints per connected component 3339 */ 3340 n_vertices = 0; 3341 if (ISForVertices) { 3342 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3343 } 3344 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3345 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3346 3347 total_counts = n_ISForFaces+n_ISForEdges; 3348 total_counts *= max_constraints; 3349 total_counts += n_vertices; 3350 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3351 3352 total_counts = 0; 3353 max_size_of_constraint = 0; 3354 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3355 IS used_is; 3356 if (i<n_ISForEdges) { 3357 used_is = ISForEdges[i]; 3358 } else { 3359 used_is = ISForFaces[i-n_ISForEdges]; 3360 } 3361 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3362 total_counts += j; 3363 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3364 } 3365 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3366 3367 /* get local part of global near null space vectors */ 3368 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3369 for (k=0;k<nnsp_size;k++) { 3370 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3371 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3372 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3373 } 3374 3375 /* whether or not to skip lapack calls */ 3376 skip_lapack = PETSC_TRUE; 3377 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3378 3379 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3380 if (!skip_lapack) { 3381 PetscScalar temp_work; 3382 3383 #if defined(PETSC_MISSING_LAPACK_GESVD) 3384 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3385 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3386 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3387 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3388 #if defined(PETSC_USE_COMPLEX) 3389 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3390 #endif 3391 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3392 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3393 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3394 lwork = -1; 3395 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3396 #if !defined(PETSC_USE_COMPLEX) 3397 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3398 #else 3399 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3400 #endif 3401 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3402 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3403 #else /* on missing GESVD */ 3404 /* SVD */ 3405 PetscInt max_n,min_n; 3406 max_n = max_size_of_constraint; 3407 min_n = max_constraints; 3408 if (max_size_of_constraint < max_constraints) { 3409 min_n = max_size_of_constraint; 3410 max_n = max_constraints; 3411 } 3412 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3413 #if defined(PETSC_USE_COMPLEX) 3414 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3415 #endif 3416 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3417 lwork = -1; 3418 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3419 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3420 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3421 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3422 #if !defined(PETSC_USE_COMPLEX) 3423 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3424 #else 3425 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3426 #endif 3427 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3428 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3429 #endif /* on missing GESVD */ 3430 /* Allocate optimal workspace */ 3431 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3432 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3433 } 3434 /* Now we can loop on constraining sets */ 3435 total_counts = 0; 3436 constraints_idxs_ptr[0] = 0; 3437 constraints_data_ptr[0] = 0; 3438 /* vertices */ 3439 if (n_vertices) { 3440 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3441 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3442 for (i=0;i<n_vertices;i++) { 3443 constraints_n[total_counts] = 1; 3444 constraints_data[total_counts] = 1.0; 3445 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3446 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3447 total_counts++; 3448 } 3449 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3450 n_vertices = total_counts; 3451 } 3452 3453 /* edges and faces */ 3454 total_counts_cc = total_counts; 3455 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3456 IS used_is; 3457 PetscBool idxs_copied = PETSC_FALSE; 3458 3459 if (ncc<n_ISForEdges) { 3460 used_is = ISForEdges[ncc]; 3461 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3462 } else { 3463 used_is = ISForFaces[ncc-n_ISForEdges]; 3464 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3465 } 3466 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3467 3468 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3469 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3470 /* change of basis should not be performed on local periodic nodes */ 3471 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3472 if (nnsp_has_cnst) { 3473 PetscScalar quad_value; 3474 3475 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3476 idxs_copied = PETSC_TRUE; 3477 3478 if (!pcbddc->use_nnsp_true) { 3479 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3480 } else { 3481 quad_value = 1.0; 3482 } 3483 for (j=0;j<size_of_constraint;j++) { 3484 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3485 } 3486 temp_constraints++; 3487 total_counts++; 3488 } 3489 for (k=0;k<nnsp_size;k++) { 3490 PetscReal real_value; 3491 PetscScalar *ptr_to_data; 3492 3493 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3494 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3495 for (j=0;j<size_of_constraint;j++) { 3496 ptr_to_data[j] = array[is_indices[j]]; 3497 } 3498 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3499 /* check if array is null on the connected component */ 3500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3501 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3502 if (real_value > 0.0) { /* keep indices and values */ 3503 temp_constraints++; 3504 total_counts++; 3505 if (!idxs_copied) { 3506 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3507 idxs_copied = PETSC_TRUE; 3508 } 3509 } 3510 } 3511 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3512 valid_constraints = temp_constraints; 3513 if (!pcbddc->use_nnsp_true && temp_constraints) { 3514 if (temp_constraints == 1) { /* just normalize the constraint */ 3515 PetscScalar norm,*ptr_to_data; 3516 3517 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3519 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3520 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3521 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3522 } else { /* perform SVD */ 3523 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3524 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3525 3526 #if defined(PETSC_MISSING_LAPACK_GESVD) 3527 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3528 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3529 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3530 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3531 from that computed using LAPACKgesvd 3532 -> This is due to a different computation of eigenvectors in LAPACKheev 3533 -> The quality of the POD-computed basis will be the same */ 3534 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3535 /* Store upper triangular part of correlation matrix */ 3536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3538 for (j=0;j<temp_constraints;j++) { 3539 for (k=0;k<j+1;k++) { 3540 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3541 } 3542 } 3543 /* compute eigenvalues and eigenvectors of correlation matrix */ 3544 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3545 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3546 #if !defined(PETSC_USE_COMPLEX) 3547 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3548 #else 3549 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3550 #endif 3551 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3552 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3553 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3554 j = 0; 3555 while (j < temp_constraints && singular_vals[j] < tol) j++; 3556 total_counts = total_counts-j; 3557 valid_constraints = temp_constraints-j; 3558 /* scale and copy POD basis into used quadrature memory */ 3559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3560 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3561 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3563 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3564 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3565 if (j<temp_constraints) { 3566 PetscInt ii; 3567 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3568 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3569 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3570 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3571 for (k=0;k<temp_constraints-j;k++) { 3572 for (ii=0;ii<size_of_constraint;ii++) { 3573 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3574 } 3575 } 3576 } 3577 #else /* on missing GESVD */ 3578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3579 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3581 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3582 #if !defined(PETSC_USE_COMPLEX) 3583 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3584 #else 3585 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3586 #endif 3587 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3588 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3589 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3590 k = temp_constraints; 3591 if (k > size_of_constraint) k = size_of_constraint; 3592 j = 0; 3593 while (j < k && singular_vals[k-j-1] < tol) j++; 3594 valid_constraints = k-j; 3595 total_counts = total_counts-temp_constraints+valid_constraints; 3596 #endif /* on missing GESVD */ 3597 } 3598 } 3599 /* update pointers information */ 3600 if (valid_constraints) { 3601 constraints_n[total_counts_cc] = valid_constraints; 3602 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3603 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3604 /* set change_of_basis flag */ 3605 if (boolforchange) { 3606 PetscBTSet(change_basis,total_counts_cc); 3607 } 3608 total_counts_cc++; 3609 } 3610 } 3611 /* free workspace */ 3612 if (!skip_lapack) { 3613 ierr = PetscFree(work);CHKERRQ(ierr); 3614 #if defined(PETSC_USE_COMPLEX) 3615 ierr = PetscFree(rwork);CHKERRQ(ierr); 3616 #endif 3617 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3618 #if defined(PETSC_MISSING_LAPACK_GESVD) 3619 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3620 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3621 #endif 3622 } 3623 for (k=0;k<nnsp_size;k++) { 3624 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3625 } 3626 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3627 /* free index sets of faces, edges and vertices */ 3628 for (i=0;i<n_ISForFaces;i++) { 3629 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3630 } 3631 if (n_ISForFaces) { 3632 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3633 } 3634 for (i=0;i<n_ISForEdges;i++) { 3635 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3636 } 3637 if (n_ISForEdges) { 3638 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3639 } 3640 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3641 } else { 3642 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3643 3644 total_counts = 0; 3645 n_vertices = 0; 3646 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3647 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3648 } 3649 max_constraints = 0; 3650 total_counts_cc = 0; 3651 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3652 total_counts += pcbddc->adaptive_constraints_n[i]; 3653 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3654 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3655 } 3656 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3657 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3658 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3659 constraints_data = pcbddc->adaptive_constraints_data; 3660 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3661 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3662 total_counts_cc = 0; 3663 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3664 if (pcbddc->adaptive_constraints_n[i]) { 3665 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3666 } 3667 } 3668 #if 0 3669 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3670 for (i=0;i<total_counts_cc;i++) { 3671 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3672 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3673 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3674 printf(" %d",constraints_idxs[j]); 3675 } 3676 printf("\n"); 3677 printf("number of cc: %d\n",constraints_n[i]); 3678 } 3679 for (i=0;i<n_vertices;i++) { 3680 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3681 } 3682 for (i=0;i<sub_schurs->n_subs;i++) { 3683 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3684 } 3685 #endif 3686 3687 max_size_of_constraint = 0; 3688 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3689 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3690 /* Change of basis */ 3691 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3692 if (pcbddc->use_change_of_basis) { 3693 for (i=0;i<sub_schurs->n_subs;i++) { 3694 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3695 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3696 } 3697 } 3698 } 3699 } 3700 pcbddc->local_primal_size = total_counts; 3701 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3702 3703 /* map constraints_idxs in boundary numbering */ 3704 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3705 if (i != constraints_idxs_ptr[total_counts_cc]) { 3706 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3707 } 3708 3709 /* Create constraint matrix */ 3710 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3711 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3712 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3713 3714 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3715 /* determine if a QR strategy is needed for change of basis */ 3716 qr_needed = PETSC_FALSE; 3717 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3718 total_primal_vertices=0; 3719 pcbddc->local_primal_size_cc = 0; 3720 for (i=0;i<total_counts_cc;i++) { 3721 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3722 if (size_of_constraint == 1) { 3723 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3724 pcbddc->local_primal_size_cc += 1; 3725 } else if (PetscBTLookup(change_basis,i)) { 3726 for (k=0;k<constraints_n[i];k++) { 3727 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3728 } 3729 pcbddc->local_primal_size_cc += constraints_n[i]; 3730 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3731 PetscBTSet(qr_needed_idx,i); 3732 qr_needed = PETSC_TRUE; 3733 } 3734 } else { 3735 pcbddc->local_primal_size_cc += 1; 3736 } 3737 } 3738 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3739 pcbddc->n_vertices = total_primal_vertices; 3740 /* permute indices in order to have a sorted set of vertices */ 3741 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3742 3743 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3744 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3745 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3746 3747 /* nonzero structure of constraint matrix */ 3748 /* and get reference dof for local constraints */ 3749 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3750 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3751 3752 j = total_primal_vertices; 3753 total_counts = total_primal_vertices; 3754 cum = total_primal_vertices; 3755 for (i=n_vertices;i<total_counts_cc;i++) { 3756 if (!PetscBTLookup(change_basis,i)) { 3757 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3758 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3759 cum++; 3760 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3761 for (k=0;k<constraints_n[i];k++) { 3762 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3763 nnz[j+k] = size_of_constraint; 3764 } 3765 j += constraints_n[i]; 3766 } 3767 } 3768 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3769 ierr = PetscFree(nnz);CHKERRQ(ierr); 3770 3771 /* set values in constraint matrix */ 3772 for (i=0;i<total_primal_vertices;i++) { 3773 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3774 } 3775 total_counts = total_primal_vertices; 3776 for (i=n_vertices;i<total_counts_cc;i++) { 3777 if (!PetscBTLookup(change_basis,i)) { 3778 PetscInt *cols; 3779 3780 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3781 cols = constraints_idxs+constraints_idxs_ptr[i]; 3782 for (k=0;k<constraints_n[i];k++) { 3783 PetscInt row = total_counts+k; 3784 PetscScalar *vals; 3785 3786 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3787 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3788 } 3789 total_counts += constraints_n[i]; 3790 } 3791 } 3792 /* assembling */ 3793 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3794 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3795 3796 /* 3797 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3798 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3799 */ 3800 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3801 if (pcbddc->use_change_of_basis) { 3802 /* dual and primal dofs on a single cc */ 3803 PetscInt dual_dofs,primal_dofs; 3804 /* working stuff for GEQRF */ 3805 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3806 PetscBLASInt lqr_work; 3807 /* working stuff for UNGQR */ 3808 PetscScalar *gqr_work,lgqr_work_t; 3809 PetscBLASInt lgqr_work; 3810 /* working stuff for TRTRS */ 3811 PetscScalar *trs_rhs; 3812 PetscBLASInt Blas_NRHS; 3813 /* pointers for values insertion into change of basis matrix */ 3814 PetscInt *start_rows,*start_cols; 3815 PetscScalar *start_vals; 3816 /* working stuff for values insertion */ 3817 PetscBT is_primal; 3818 PetscInt *aux_primal_numbering_B; 3819 /* matrix sizes */ 3820 PetscInt global_size,local_size; 3821 /* temporary change of basis */ 3822 Mat localChangeOfBasisMatrix; 3823 /* extra space for debugging */ 3824 PetscScalar *dbg_work; 3825 3826 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3827 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3828 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3829 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3830 /* nonzeros for local mat */ 3831 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3832 for (i=0;i<pcis->n;i++) nnz[i]=1; 3833 for (i=n_vertices;i<total_counts_cc;i++) { 3834 if (PetscBTLookup(change_basis,i)) { 3835 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3836 if (PetscBTLookup(qr_needed_idx,i)) { 3837 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3838 } else { 3839 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3840 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3841 } 3842 } 3843 } 3844 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3845 ierr = PetscFree(nnz);CHKERRQ(ierr); 3846 /* Set initial identity in the matrix */ 3847 for (i=0;i<pcis->n;i++) { 3848 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3849 } 3850 3851 if (pcbddc->dbg_flag) { 3852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3853 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3854 } 3855 3856 3857 /* Now we loop on the constraints which need a change of basis */ 3858 /* 3859 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3860 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3861 3862 Basic blocks of change of basis matrix T computed by 3863 3864 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3865 3866 | 1 0 ... 0 s_1/S | 3867 | 0 1 ... 0 s_2/S | 3868 | ... | 3869 | 0 ... 1 s_{n-1}/S | 3870 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3871 3872 with S = \sum_{i=1}^n s_i^2 3873 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3874 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3875 3876 - QR decomposition of constraints otherwise 3877 */ 3878 if (qr_needed) { 3879 /* space to store Q */ 3880 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3881 /* first we issue queries for optimal work */ 3882 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3883 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3884 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3885 lqr_work = -1; 3886 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3887 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3888 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3889 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3890 lgqr_work = -1; 3891 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3892 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3893 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3894 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3895 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3896 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3897 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3898 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3899 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3900 /* array to store scaling factors for reflectors */ 3901 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3902 /* array to store rhs and solution of triangular solver */ 3903 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3904 /* allocating workspace for check */ 3905 if (pcbddc->dbg_flag) { 3906 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3907 } 3908 } 3909 /* array to store whether a node is primal or not */ 3910 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3911 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3912 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3913 if (i != total_primal_vertices) { 3914 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3915 } 3916 for (i=0;i<total_primal_vertices;i++) { 3917 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3918 } 3919 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3920 3921 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3922 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3923 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3924 if (PetscBTLookup(change_basis,total_counts)) { 3925 /* get constraint info */ 3926 primal_dofs = constraints_n[total_counts]; 3927 dual_dofs = size_of_constraint-primal_dofs; 3928 3929 if (pcbddc->dbg_flag) { 3930 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3931 } 3932 3933 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3934 3935 /* copy quadrature constraints for change of basis check */ 3936 if (pcbddc->dbg_flag) { 3937 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3938 } 3939 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3940 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3941 3942 /* compute QR decomposition of constraints */ 3943 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3944 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3947 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3948 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3949 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3950 3951 /* explictly compute R^-T */ 3952 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3953 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3954 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3955 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3957 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3958 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3959 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3960 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3961 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3962 3963 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3964 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3966 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3968 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3969 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3970 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3971 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3972 3973 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3974 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3975 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3977 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3979 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3980 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3982 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3983 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3984 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3985 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3986 3987 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3988 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3989 /* insert cols for primal dofs */ 3990 for (j=0;j<primal_dofs;j++) { 3991 start_vals = &qr_basis[j*size_of_constraint]; 3992 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3993 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3994 } 3995 /* insert cols for dual dofs */ 3996 for (j=0,k=0;j<dual_dofs;k++) { 3997 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3998 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3999 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4000 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4001 j++; 4002 } 4003 } 4004 4005 /* check change of basis */ 4006 if (pcbddc->dbg_flag) { 4007 PetscInt ii,jj; 4008 PetscBool valid_qr=PETSC_TRUE; 4009 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4010 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4011 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4014 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4015 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4016 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4018 for (jj=0;jj<size_of_constraint;jj++) { 4019 for (ii=0;ii<primal_dofs;ii++) { 4020 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4021 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4022 } 4023 } 4024 if (!valid_qr) { 4025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4026 for (jj=0;jj<size_of_constraint;jj++) { 4027 for (ii=0;ii<primal_dofs;ii++) { 4028 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4029 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4030 } 4031 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4032 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4033 } 4034 } 4035 } 4036 } else { 4037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4038 } 4039 } 4040 } else { /* simple transformation block */ 4041 PetscInt row,col; 4042 PetscScalar val,norm; 4043 4044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4045 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4046 for (j=0;j<size_of_constraint;j++) { 4047 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4048 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4049 if (!PetscBTLookup(is_primal,row_B)) { 4050 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4051 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4052 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4053 } else { 4054 for (k=0;k<size_of_constraint;k++) { 4055 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4056 if (row != col) { 4057 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4058 } else { 4059 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4060 } 4061 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4062 } 4063 } 4064 } 4065 if (pcbddc->dbg_flag) { 4066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4067 } 4068 } 4069 } else { 4070 if (pcbddc->dbg_flag) { 4071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4072 } 4073 } 4074 } 4075 4076 /* free workspace */ 4077 if (qr_needed) { 4078 if (pcbddc->dbg_flag) { 4079 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4080 } 4081 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4082 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4083 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4084 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4085 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4086 } 4087 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4088 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4089 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4090 4091 /* assembling of global change of variable */ 4092 if (!pcbddc->fake_change) { 4093 Mat tmat; 4094 PetscInt bs; 4095 4096 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4097 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4098 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4099 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4100 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4101 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4102 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4103 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4104 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4105 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4106 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4107 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4108 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4109 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4110 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4111 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4112 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4113 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4114 4115 /* check */ 4116 if (pcbddc->dbg_flag) { 4117 PetscReal error; 4118 Vec x,x_change; 4119 4120 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4121 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4122 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4123 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4124 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4125 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4126 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4127 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4128 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4129 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4130 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4131 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4132 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4134 ierr = VecDestroy(&x);CHKERRQ(ierr); 4135 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4136 } 4137 } 4138 4139 /* adapt sub_schurs computed (if any) */ 4140 if (pcbddc->use_deluxe_scaling) { 4141 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4142 if (sub_schurs->S_Ej_all) { 4143 Mat S_new,tmat; 4144 ISLocalToGlobalMapping NtoSall; 4145 IS is_all_N,is_V,is_V_Sall; 4146 const PetscScalar *array; 4147 const PetscInt *idxs_V,*idxs_all; 4148 PetscInt i,n_V; 4149 4150 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4151 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4152 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4153 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4154 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4155 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4156 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4157 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4158 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4159 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4160 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4161 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4162 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4163 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4164 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4165 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4166 for (i=0;i<n_V;i++) { 4167 PetscScalar val; 4168 PetscInt idx; 4169 4170 idx = idxs_V[i]; 4171 val = array[idxs_all[idxs_V[i]]]; 4172 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4173 } 4174 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4175 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4176 sub_schurs->S_Ej_all = S_new; 4177 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4178 if (sub_schurs->sum_S_Ej_all) { 4179 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4180 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4181 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4182 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4183 sub_schurs->sum_S_Ej_all = S_new; 4184 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4185 } 4186 if (sub_schurs->sum_S_Ej_inv_all && pcbddc->fake_change) { 4187 PetscScalar infty = PETSC_MAX_REAL; 4188 ierr = MatPtAP(sub_schurs->sum_S_Ej_inv_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4189 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 4190 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4191 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4192 sub_schurs->sum_S_Ej_inv_all = S_new; 4193 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4194 ierr = MatPtAP(sub_schurs->sum_S_Ej_tilda_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4195 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 4196 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4197 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,infty,NULL,NULL);CHKERRQ(ierr); 4198 sub_schurs->sum_S_Ej_tilda_all = S_new; 4199 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4200 } 4201 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4202 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4203 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4204 if (!pcbddc->fake_change) { 4205 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4206 } else { 4207 if (!sub_schurs->change_primal) { 4208 sub_schurs->change_primal = is_V_Sall; 4209 } else { 4210 #if defined(PETSC_USE_DEBUG) 4211 IS ist; 4212 PetscInt n; 4213 4214 ierr = ISDifference(sub_schurs->change_primal,is_V_Sall,&ist);CHKERRQ(ierr); 4215 ierr = ISGetLocalSize(ist,&n);CHKERRQ(ierr); 4216 if (n) { 4217 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 4218 } 4219 ierr = ISDestroy(&ist);CHKERRQ(ierr); 4220 #endif 4221 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4222 } 4223 if (!sub_schurs->change) { 4224 ierr = KSPCreate(PETSC_COMM_SELF,&sub_schurs->change);CHKERRQ(ierr); 4225 ierr = KSPSetOperators(sub_schurs->change,tmat,tmat);CHKERRQ(ierr); 4226 #if defined(PETSC_USE_DEBUG) 4227 } else { 4228 Mat At; 4229 PetscReal norm; 4230 4231 ierr = KSPGetOperators(sub_schurs->change,&At,NULL);CHKERRQ(ierr); 4232 ierr = MatAXPY(tmat,-1.,At,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4233 ierr = MatNorm(tmat,NORM_INFINITY,&norm);CHKERRQ(ierr); 4234 if (norm > PETSC_SMALL) { 4235 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 4236 } 4237 #endif 4238 } 4239 } 4240 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4241 } 4242 } 4243 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4244 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4245 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4246 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4247 } 4248 4249 /* set up change of basis context */ 4250 if (pcbddc->ChangeOfBasisMatrix) { 4251 PCBDDCChange_ctx change_ctx; 4252 4253 if (!pcbddc->new_global_mat) { 4254 PetscInt global_size,local_size; 4255 4256 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4257 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4258 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4259 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4260 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4261 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4262 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4263 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4264 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4265 } else { 4266 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4267 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4268 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4269 } 4270 if (!pcbddc->user_ChangeOfBasisMatrix) { 4271 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4272 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4273 } else { 4274 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4275 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4276 } 4277 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4278 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4279 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4280 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4281 } else { 4282 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4283 } 4284 4285 /* add pressure dofs to set of primal nodes for numbering purposes */ 4286 for (i=0;i<pcbddc->benign_n;i++) { 4287 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4288 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4289 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4290 pcbddc->local_primal_size_cc++; 4291 pcbddc->local_primal_size++; 4292 } 4293 4294 /* check if a new primal space has been introduced (also take into account benign trick) */ 4295 pcbddc->new_primal_space_local = PETSC_TRUE; 4296 if (olocal_primal_size == pcbddc->local_primal_size) { 4297 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4298 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4299 if (!pcbddc->new_primal_space_local) { 4300 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4301 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4302 } 4303 } 4304 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4305 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4306 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4307 4308 /* flush dbg viewer */ 4309 if (pcbddc->dbg_flag) { 4310 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4311 } 4312 4313 /* free workspace */ 4314 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4315 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4316 if (!pcbddc->adaptive_selection) { 4317 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4318 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4319 } else { 4320 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4321 pcbddc->adaptive_constraints_idxs_ptr, 4322 pcbddc->adaptive_constraints_data_ptr, 4323 pcbddc->adaptive_constraints_idxs, 4324 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4325 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4326 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4327 } 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4333 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4334 { 4335 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4336 PC_IS *pcis = (PC_IS*)pc->data; 4337 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4338 PetscInt ierr,i,vertex_size,N; 4339 PetscViewer viewer=pcbddc->dbg_viewer; 4340 4341 PetscFunctionBegin; 4342 /* Reset previously computed graph */ 4343 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4344 /* Init local Graph struct */ 4345 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4346 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4347 4348 /* Check validity of the csr graph passed in by the user */ 4349 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4350 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4351 } 4352 4353 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4354 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4355 PetscInt *xadj,*adjncy; 4356 PetscInt nvtxs; 4357 PetscBool flg_row=PETSC_FALSE; 4358 4359 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4360 if (flg_row) { 4361 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4362 pcbddc->computed_rowadj = PETSC_TRUE; 4363 } 4364 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4365 } 4366 if (pcbddc->dbg_flag) { 4367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4368 } 4369 4370 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4371 vertex_size = 1; 4372 if (pcbddc->user_provided_isfordofs) { 4373 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4374 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4375 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4376 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4377 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4378 } 4379 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4380 pcbddc->n_ISForDofs = 0; 4381 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4382 } 4383 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4384 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4385 } else { 4386 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4387 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4388 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4389 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4390 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4391 } 4392 } 4393 } 4394 4395 /* Setup of Graph */ 4396 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4397 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4398 } 4399 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4400 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4401 } 4402 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4403 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4404 } 4405 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4406 4407 /* attach info on disconnected subdomains if present */ 4408 if (pcbddc->n_local_subs) { 4409 PetscInt *local_subs; 4410 4411 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4412 for (i=0;i<pcbddc->n_local_subs;i++) { 4413 const PetscInt *idxs; 4414 PetscInt nl,j; 4415 4416 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4417 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4418 for (j=0;j<nl;j++) { 4419 local_subs[idxs[j]] = i; 4420 } 4421 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4422 } 4423 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4424 pcbddc->mat_graph->local_subs = local_subs; 4425 } 4426 4427 /* Graph's connected components analysis */ 4428 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4429 4430 /* print some info to stdout */ 4431 if (pcbddc->dbg_flag) { 4432 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4433 } 4434 4435 /* mark topography has done */ 4436 pcbddc->recompute_topography = PETSC_FALSE; 4437 PetscFunctionReturn(0); 4438 } 4439 4440 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4441 #undef __FUNCT__ 4442 #define __FUNCT__ "PCBDDCSubsetNumbering" 4443 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4444 { 4445 PetscSF sf; 4446 PetscLayout map; 4447 const PetscInt *idxs; 4448 PetscInt *leaf_data,*root_data,*gidxs; 4449 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4450 PetscInt n_n,nlocals,start,first_index; 4451 PetscMPIInt commsize; 4452 PetscBool first_found; 4453 PetscErrorCode ierr; 4454 4455 PetscFunctionBegin; 4456 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4457 if (subset_mult) { 4458 PetscCheckSameComm(subset,1,subset_mult,2); 4459 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4460 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4461 } 4462 /* create workspace layout for computing global indices of subset */ 4463 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4464 lbounds[0] = lbounds[1] = 0; 4465 for (i=0;i<n;i++) { 4466 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4467 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4468 } 4469 lbounds[0] = -lbounds[0]; 4470 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4471 gbounds[0] = -gbounds[0]; 4472 N = gbounds[1] - gbounds[0] + 1; 4473 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4474 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4475 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4476 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4477 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4478 4479 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4480 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4481 if (subset_mult) { 4482 const PetscInt* idxs_mult; 4483 4484 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4485 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4486 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4487 } else { 4488 for (i=0;i<n;i++) leaf_data[i] = 1; 4489 } 4490 /* local size of new subset */ 4491 n_n = 0; 4492 for (i=0;i<n;i++) n_n += leaf_data[i]; 4493 4494 /* global indexes in layout */ 4495 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4496 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4497 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4498 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4499 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4500 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4501 4502 /* reduce from leaves to roots */ 4503 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4504 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4505 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4506 4507 /* count indexes in local part of layout */ 4508 nlocals = 0; 4509 first_index = -1; 4510 first_found = PETSC_FALSE; 4511 for (i=0;i<Nl;i++) { 4512 if (!first_found && root_data[i]) { 4513 first_found = PETSC_TRUE; 4514 first_index = i; 4515 } 4516 nlocals += root_data[i]; 4517 } 4518 4519 /* cumulative of number of indexes and size of subset without holes */ 4520 #if defined(PETSC_HAVE_MPI_EXSCAN) 4521 start = 0; 4522 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4523 #else 4524 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4525 start = start-nlocals; 4526 #endif 4527 4528 if (N_n) { /* compute total size of new subset if requested */ 4529 *N_n = start + nlocals; 4530 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4531 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4532 } 4533 4534 /* adapt root data with cumulative */ 4535 if (first_found) { 4536 PetscInt old_index; 4537 4538 root_data[first_index] += start; 4539 old_index = first_index; 4540 for (i=first_index+1;i<Nl;i++) { 4541 if (root_data[i]) { 4542 root_data[i] += root_data[old_index]; 4543 old_index = i; 4544 } 4545 } 4546 } 4547 4548 /* from roots to leaves */ 4549 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4550 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4551 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4552 4553 /* create new IS with global indexes without holes */ 4554 if (subset_mult) { 4555 const PetscInt* idxs_mult; 4556 PetscInt cum; 4557 4558 cum = 0; 4559 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4560 for (i=0;i<n;i++) { 4561 PetscInt j; 4562 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4563 } 4564 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4565 } else { 4566 for (i=0;i<n;i++) { 4567 gidxs[i] = leaf_data[i]-1; 4568 } 4569 } 4570 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4571 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4572 PetscFunctionReturn(0); 4573 } 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4577 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4578 { 4579 PetscInt i,j; 4580 PetscScalar *alphas; 4581 PetscErrorCode ierr; 4582 4583 PetscFunctionBegin; 4584 /* this implements stabilized Gram-Schmidt */ 4585 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4586 for (i=0;i<n;i++) { 4587 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4588 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4589 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4590 } 4591 ierr = PetscFree(alphas);CHKERRQ(ierr); 4592 PetscFunctionReturn(0); 4593 } 4594 4595 #undef __FUNCT__ 4596 #define __FUNCT__ "MatISGetSubassemblingPattern" 4597 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4598 { 4599 IS ranks_send_to; 4600 PetscInt n_neighs,*neighs,*n_shared,**shared; 4601 PetscMPIInt size,rank,color; 4602 PetscInt *xadj,*adjncy; 4603 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4604 PetscInt i,local_size,threshold=0; 4605 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4606 PetscSubcomm subcomm; 4607 PetscErrorCode ierr; 4608 4609 PetscFunctionBegin; 4610 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4611 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4612 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4613 4614 /* Get info on mapping */ 4615 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4616 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4617 4618 /* build local CSR graph of subdomains' connectivity */ 4619 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4620 xadj[0] = 0; 4621 xadj[1] = PetscMax(n_neighs-1,0); 4622 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4623 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4624 4625 if (threshold) { 4626 PetscInt xadj_count = 0; 4627 for (i=1;i<n_neighs;i++) { 4628 if (n_shared[i] > threshold) { 4629 adjncy[xadj_count] = neighs[i]; 4630 adjncy_wgt[xadj_count] = n_shared[i]; 4631 xadj_count++; 4632 } 4633 } 4634 xadj[1] = xadj_count; 4635 } else { 4636 if (xadj[1]) { 4637 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4638 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4639 } 4640 } 4641 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4642 if (use_square) { 4643 for (i=0;i<xadj[1];i++) { 4644 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4645 } 4646 } 4647 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4648 4649 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4650 4651 /* 4652 Restrict work on active processes only. 4653 */ 4654 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4655 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4656 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4657 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4658 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4659 if (color) { 4660 ierr = PetscFree(xadj);CHKERRQ(ierr); 4661 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4662 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4663 } else { 4664 Mat subdomain_adj; 4665 IS new_ranks,new_ranks_contig; 4666 MatPartitioning partitioner; 4667 PetscInt prank,rstart=0,rend=0; 4668 PetscInt *is_indices,*oldranks; 4669 PetscBool aggregate; 4670 4671 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4672 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4673 prank = rank; 4674 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4675 /* 4676 for (i=0;i<size;i++) { 4677 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4678 } 4679 */ 4680 for (i=0;i<xadj[1];i++) { 4681 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4682 } 4683 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4684 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4685 if (aggregate) { 4686 PetscInt lrows,row,ncols,*cols; 4687 PetscMPIInt nrank; 4688 PetscScalar *vals; 4689 4690 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4691 lrows = 0; 4692 if (nrank<redprocs) { 4693 lrows = size/redprocs; 4694 if (nrank<size%redprocs) lrows++; 4695 } 4696 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4697 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4698 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4699 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4700 row = nrank; 4701 ncols = xadj[1]-xadj[0]; 4702 cols = adjncy; 4703 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4704 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4705 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4706 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4707 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4708 ierr = PetscFree(xadj);CHKERRQ(ierr); 4709 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4710 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4711 ierr = PetscFree(vals);CHKERRQ(ierr); 4712 } else { 4713 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4714 } 4715 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4716 4717 /* Partition */ 4718 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4719 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4720 if (use_vwgt) { 4721 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4722 v_wgt[0] = local_size; 4723 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4724 } 4725 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4726 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4727 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4728 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4729 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4730 4731 /* renumber new_ranks to avoid "holes" in new set of processors */ 4732 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4733 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4734 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4735 if (!redprocs) { 4736 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4737 } else { 4738 PetscInt idxs[1]; 4739 PetscMPIInt tag; 4740 MPI_Request *reqs; 4741 4742 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4743 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4744 for (i=rstart;i<rend;i++) { 4745 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4746 } 4747 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4748 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4749 ierr = PetscFree(reqs);CHKERRQ(ierr); 4750 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4751 } 4752 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4753 /* clean up */ 4754 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4755 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4756 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4757 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4758 } 4759 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4760 4761 /* assemble parallel IS for sends */ 4762 i = 1; 4763 if (color) i=0; 4764 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4765 /* get back IS */ 4766 *is_sends = ranks_send_to; 4767 PetscFunctionReturn(0); 4768 } 4769 4770 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4771 4772 #undef __FUNCT__ 4773 #define __FUNCT__ "MatISSubassemble" 4774 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4775 { 4776 Mat local_mat; 4777 IS is_sends_internal; 4778 PetscInt rows,cols,new_local_rows; 4779 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4780 PetscBool ismatis,isdense,newisdense,destroy_mat; 4781 ISLocalToGlobalMapping l2gmap; 4782 PetscInt* l2gmap_indices; 4783 const PetscInt* is_indices; 4784 MatType new_local_type; 4785 /* buffers */ 4786 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4787 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4788 PetscInt *recv_buffer_idxs_local; 4789 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4790 /* MPI */ 4791 MPI_Comm comm,comm_n; 4792 PetscSubcomm subcomm; 4793 PetscMPIInt n_sends,n_recvs,commsize; 4794 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4795 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4796 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4797 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4798 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4799 PetscErrorCode ierr; 4800 4801 PetscFunctionBegin; 4802 /* TODO: add missing checks */ 4803 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4804 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4805 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4806 PetscValidLogicalCollectiveInt(mat,nis,7); 4807 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4808 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4809 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4810 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4811 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4812 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4813 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4814 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4815 PetscInt mrows,mcols,mnrows,mncols; 4816 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4817 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4818 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4819 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4820 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4821 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4822 } 4823 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4824 PetscValidLogicalCollectiveInt(mat,bs,0); 4825 /* prepare IS for sending if not provided */ 4826 if (!is_sends) { 4827 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4828 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4829 } else { 4830 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4831 is_sends_internal = is_sends; 4832 } 4833 4834 /* get comm */ 4835 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4836 4837 /* compute number of sends */ 4838 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4839 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4840 4841 /* compute number of receives */ 4842 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4843 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4844 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4845 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4846 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4847 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4848 ierr = PetscFree(iflags);CHKERRQ(ierr); 4849 4850 /* restrict comm if requested */ 4851 subcomm = 0; 4852 destroy_mat = PETSC_FALSE; 4853 if (restrict_comm) { 4854 PetscMPIInt color,subcommsize; 4855 4856 color = 0; 4857 if (restrict_full) { 4858 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4859 } else { 4860 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4861 } 4862 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4863 subcommsize = commsize - subcommsize; 4864 /* check if reuse has been requested */ 4865 if (reuse == MAT_REUSE_MATRIX) { 4866 if (*mat_n) { 4867 PetscMPIInt subcommsize2; 4868 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4869 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4870 comm_n = PetscObjectComm((PetscObject)*mat_n); 4871 } else { 4872 comm_n = PETSC_COMM_SELF; 4873 } 4874 } else { /* MAT_INITIAL_MATRIX */ 4875 PetscMPIInt rank; 4876 4877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4878 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4879 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4880 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4881 comm_n = PetscSubcommChild(subcomm); 4882 } 4883 /* flag to destroy *mat_n if not significative */ 4884 if (color) destroy_mat = PETSC_TRUE; 4885 } else { 4886 comm_n = comm; 4887 } 4888 4889 /* prepare send/receive buffers */ 4890 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4891 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4892 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4893 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4894 if (nis) { 4895 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4896 } 4897 4898 /* Get data from local matrices */ 4899 if (!isdense) { 4900 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4901 /* TODO: See below some guidelines on how to prepare the local buffers */ 4902 /* 4903 send_buffer_vals should contain the raw values of the local matrix 4904 send_buffer_idxs should contain: 4905 - MatType_PRIVATE type 4906 - PetscInt size_of_l2gmap 4907 - PetscInt global_row_indices[size_of_l2gmap] 4908 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4909 */ 4910 } else { 4911 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4912 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4913 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4914 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4915 send_buffer_idxs[1] = i; 4916 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4917 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4918 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4919 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4920 for (i=0;i<n_sends;i++) { 4921 ilengths_vals[is_indices[i]] = len*len; 4922 ilengths_idxs[is_indices[i]] = len+2; 4923 } 4924 } 4925 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4926 /* additional is (if any) */ 4927 if (nis) { 4928 PetscMPIInt psum; 4929 PetscInt j; 4930 for (j=0,psum=0;j<nis;j++) { 4931 PetscInt plen; 4932 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4933 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4934 psum += len+1; /* indices + lenght */ 4935 } 4936 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4937 for (j=0,psum=0;j<nis;j++) { 4938 PetscInt plen; 4939 const PetscInt *is_array_idxs; 4940 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4941 send_buffer_idxs_is[psum] = plen; 4942 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4943 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4944 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4945 psum += plen+1; /* indices + lenght */ 4946 } 4947 for (i=0;i<n_sends;i++) { 4948 ilengths_idxs_is[is_indices[i]] = psum; 4949 } 4950 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4951 } 4952 4953 buf_size_idxs = 0; 4954 buf_size_vals = 0; 4955 buf_size_idxs_is = 0; 4956 for (i=0;i<n_recvs;i++) { 4957 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4958 buf_size_vals += (PetscInt)olengths_vals[i]; 4959 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4960 } 4961 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4962 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4963 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4964 4965 /* get new tags for clean communications */ 4966 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4967 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4968 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4969 4970 /* allocate for requests */ 4971 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4972 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4973 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4974 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4975 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4976 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4977 4978 /* communications */ 4979 ptr_idxs = recv_buffer_idxs; 4980 ptr_vals = recv_buffer_vals; 4981 ptr_idxs_is = recv_buffer_idxs_is; 4982 for (i=0;i<n_recvs;i++) { 4983 source_dest = onodes[i]; 4984 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4985 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4986 ptr_idxs += olengths_idxs[i]; 4987 ptr_vals += olengths_vals[i]; 4988 if (nis) { 4989 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4990 ptr_idxs_is += olengths_idxs_is[i]; 4991 } 4992 } 4993 for (i=0;i<n_sends;i++) { 4994 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4995 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4996 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4997 if (nis) { 4998 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4999 } 5000 } 5001 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5002 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5003 5004 /* assemble new l2g map */ 5005 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5006 ptr_idxs = recv_buffer_idxs; 5007 new_local_rows = 0; 5008 for (i=0;i<n_recvs;i++) { 5009 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5010 ptr_idxs += olengths_idxs[i]; 5011 } 5012 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5013 ptr_idxs = recv_buffer_idxs; 5014 new_local_rows = 0; 5015 for (i=0;i<n_recvs;i++) { 5016 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5017 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5018 ptr_idxs += olengths_idxs[i]; 5019 } 5020 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5021 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5022 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5023 5024 /* infer new local matrix type from received local matrices type */ 5025 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5026 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5027 if (n_recvs) { 5028 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5029 ptr_idxs = recv_buffer_idxs; 5030 for (i=0;i<n_recvs;i++) { 5031 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5032 new_local_type_private = MATAIJ_PRIVATE; 5033 break; 5034 } 5035 ptr_idxs += olengths_idxs[i]; 5036 } 5037 switch (new_local_type_private) { 5038 case MATDENSE_PRIVATE: 5039 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5040 new_local_type = MATSEQAIJ; 5041 bs = 1; 5042 } else { /* if I receive only 1 dense matrix */ 5043 new_local_type = MATSEQDENSE; 5044 bs = 1; 5045 } 5046 break; 5047 case MATAIJ_PRIVATE: 5048 new_local_type = MATSEQAIJ; 5049 bs = 1; 5050 break; 5051 case MATBAIJ_PRIVATE: 5052 new_local_type = MATSEQBAIJ; 5053 break; 5054 case MATSBAIJ_PRIVATE: 5055 new_local_type = MATSEQSBAIJ; 5056 break; 5057 default: 5058 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5059 break; 5060 } 5061 } else { /* by default, new_local_type is seqdense */ 5062 new_local_type = MATSEQDENSE; 5063 bs = 1; 5064 } 5065 5066 /* create MATIS object if needed */ 5067 if (reuse == MAT_INITIAL_MATRIX) { 5068 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5069 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5070 } else { 5071 /* it also destroys the local matrices */ 5072 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5073 } 5074 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5075 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5076 5077 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5078 5079 /* Global to local map of received indices */ 5080 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5081 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5082 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5083 5084 /* restore attributes -> type of incoming data and its size */ 5085 buf_size_idxs = 0; 5086 for (i=0;i<n_recvs;i++) { 5087 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5088 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5089 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5090 } 5091 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5092 5093 /* set preallocation */ 5094 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5095 if (!newisdense) { 5096 PetscInt *new_local_nnz=0; 5097 5098 ptr_vals = recv_buffer_vals; 5099 ptr_idxs = recv_buffer_idxs_local; 5100 if (n_recvs) { 5101 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5102 } 5103 for (i=0;i<n_recvs;i++) { 5104 PetscInt j; 5105 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5106 for (j=0;j<*(ptr_idxs+1);j++) { 5107 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5108 } 5109 } else { 5110 /* TODO */ 5111 } 5112 ptr_idxs += olengths_idxs[i]; 5113 } 5114 if (new_local_nnz) { 5115 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5116 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5117 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5118 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5119 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5120 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5121 } else { 5122 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5123 } 5124 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5125 } else { 5126 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5127 } 5128 5129 /* set values */ 5130 ptr_vals = recv_buffer_vals; 5131 ptr_idxs = recv_buffer_idxs_local; 5132 for (i=0;i<n_recvs;i++) { 5133 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5134 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5135 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5136 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5137 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5138 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5139 } else { 5140 /* TODO */ 5141 } 5142 ptr_idxs += olengths_idxs[i]; 5143 ptr_vals += olengths_vals[i]; 5144 } 5145 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5146 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5147 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5148 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5149 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5150 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5151 5152 #if 0 5153 if (!restrict_comm) { /* check */ 5154 Vec lvec,rvec; 5155 PetscReal infty_error; 5156 5157 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5158 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5159 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5160 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5161 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5162 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5163 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5164 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5165 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5166 } 5167 #endif 5168 5169 /* assemble new additional is (if any) */ 5170 if (nis) { 5171 PetscInt **temp_idxs,*count_is,j,psum; 5172 5173 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5174 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5175 ptr_idxs = recv_buffer_idxs_is; 5176 psum = 0; 5177 for (i=0;i<n_recvs;i++) { 5178 for (j=0;j<nis;j++) { 5179 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5180 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5181 psum += plen; 5182 ptr_idxs += plen+1; /* shift pointer to received data */ 5183 } 5184 } 5185 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5186 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5187 for (i=1;i<nis;i++) { 5188 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5189 } 5190 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5191 ptr_idxs = recv_buffer_idxs_is; 5192 for (i=0;i<n_recvs;i++) { 5193 for (j=0;j<nis;j++) { 5194 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5195 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5196 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5197 ptr_idxs += plen+1; /* shift pointer to received data */ 5198 } 5199 } 5200 for (i=0;i<nis;i++) { 5201 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5202 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5203 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5204 } 5205 ierr = PetscFree(count_is);CHKERRQ(ierr); 5206 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5207 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5208 } 5209 /* free workspace */ 5210 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5211 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5212 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5213 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5214 if (isdense) { 5215 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5216 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5217 } else { 5218 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5219 } 5220 if (nis) { 5221 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5222 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5223 } 5224 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5225 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5226 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5227 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5228 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5229 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5230 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5231 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5232 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5233 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5234 ierr = PetscFree(onodes);CHKERRQ(ierr); 5235 if (nis) { 5236 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5237 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5238 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5239 } 5240 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5241 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5242 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5243 for (i=0;i<nis;i++) { 5244 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5245 } 5246 *mat_n = NULL; 5247 } 5248 PetscFunctionReturn(0); 5249 } 5250 5251 /* temporary hack into ksp private data structure */ 5252 #include <petsc/private/kspimpl.h> 5253 5254 #undef __FUNCT__ 5255 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5256 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5257 { 5258 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5259 PC_IS *pcis = (PC_IS*)pc->data; 5260 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5261 MatNullSpace CoarseNullSpace=NULL; 5262 ISLocalToGlobalMapping coarse_islg; 5263 IS coarse_is,*isarray; 5264 PetscInt i,im_active=-1,active_procs=-1; 5265 PetscInt nis,nisdofs,nisneu,nisvert; 5266 PC pc_temp; 5267 PCType coarse_pc_type; 5268 KSPType coarse_ksp_type; 5269 PetscBool multilevel_requested,multilevel_allowed; 5270 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5271 Mat t_coarse_mat_is; 5272 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5273 PetscMPIInt all_procs; 5274 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5275 PetscBool compute_vecs = PETSC_FALSE; 5276 PetscScalar *array; 5277 PetscErrorCode ierr; 5278 5279 PetscFunctionBegin; 5280 /* Assign global numbering to coarse dofs */ 5281 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5282 PetscInt ocoarse_size; 5283 compute_vecs = PETSC_TRUE; 5284 ocoarse_size = pcbddc->coarse_size; 5285 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5286 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5287 /* see if we can avoid some work */ 5288 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5289 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5290 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5291 PC pc; 5292 PetscBool isbddc; 5293 5294 /* temporary workaround since PCBDDC does not have a reset method so far */ 5295 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5296 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5297 if (isbddc) { 5298 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5299 } else { 5300 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5301 } 5302 coarse_reuse = PETSC_FALSE; 5303 } else { /* we can safely reuse already computed coarse matrix */ 5304 coarse_reuse = PETSC_TRUE; 5305 } 5306 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5307 coarse_reuse = PETSC_FALSE; 5308 } 5309 /* reset any subassembling information */ 5310 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5311 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5312 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5313 coarse_reuse = PETSC_TRUE; 5314 } 5315 5316 /* count "active" (i.e. with positive local size) and "void" processes */ 5317 im_active = !!(pcis->n); 5318 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5319 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5320 void_procs = all_procs-active_procs; 5321 csin_type_simple = PETSC_TRUE; 5322 redist = PETSC_FALSE; 5323 if (pcbddc->current_level && void_procs) { 5324 csin_ml = PETSC_TRUE; 5325 ncoarse_ml = void_procs; 5326 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5327 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5328 csin_ds = PETSC_TRUE; 5329 ncoarse_ds = pcbddc->redistribute_coarse; 5330 redist = PETSC_TRUE; 5331 } else { 5332 csin_ds = PETSC_TRUE; 5333 ncoarse_ds = active_procs; 5334 redist = PETSC_TRUE; 5335 } 5336 } else { 5337 csin_ml = PETSC_FALSE; 5338 ncoarse_ml = all_procs; 5339 if (void_procs) { 5340 csin_ds = PETSC_TRUE; 5341 ncoarse_ds = void_procs; 5342 csin_type_simple = PETSC_FALSE; 5343 } else { 5344 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5345 csin_ds = PETSC_TRUE; 5346 ncoarse_ds = pcbddc->redistribute_coarse; 5347 redist = PETSC_TRUE; 5348 } else { 5349 csin_ds = PETSC_FALSE; 5350 ncoarse_ds = all_procs; 5351 } 5352 } 5353 } 5354 5355 /* 5356 test if we can go multilevel: three conditions must be satisfied: 5357 - we have not exceeded the number of levels requested 5358 - we can actually subassemble the active processes 5359 - we can find a suitable number of MPI processes where we can place the subassembled problem 5360 */ 5361 multilevel_allowed = PETSC_FALSE; 5362 multilevel_requested = PETSC_FALSE; 5363 if (pcbddc->current_level < pcbddc->max_levels) { 5364 multilevel_requested = PETSC_TRUE; 5365 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5366 multilevel_allowed = PETSC_FALSE; 5367 } else { 5368 multilevel_allowed = PETSC_TRUE; 5369 } 5370 } 5371 /* determine number of process partecipating to coarse solver */ 5372 if (multilevel_allowed) { 5373 ncoarse = ncoarse_ml; 5374 csin = csin_ml; 5375 redist = PETSC_FALSE; 5376 } else { 5377 ncoarse = ncoarse_ds; 5378 csin = csin_ds; 5379 } 5380 5381 /* creates temporary l2gmap and IS for coarse indexes */ 5382 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5383 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5384 5385 /* creates temporary MATIS object for coarse matrix */ 5386 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5387 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5388 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5389 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5390 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5391 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5392 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5393 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5394 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5395 5396 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5397 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5398 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5399 const PetscInt *idxs; 5400 ISLocalToGlobalMapping tmap; 5401 5402 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5403 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5404 /* allocate space for temporary storage */ 5405 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5406 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5407 /* allocate for IS array */ 5408 nisdofs = pcbddc->n_ISForDofsLocal; 5409 nisneu = !!pcbddc->NeumannBoundariesLocal; 5410 nisvert = 0; 5411 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5412 nisvert = 1; 5413 } 5414 nis = nisdofs + nisneu + nisvert; 5415 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5416 /* dofs splitting */ 5417 for (i=0;i<nisdofs;i++) { 5418 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5419 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5420 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5421 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5422 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5423 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5424 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5425 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5426 } 5427 /* neumann boundaries */ 5428 if (pcbddc->NeumannBoundariesLocal) { 5429 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5430 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5431 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5432 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5433 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5434 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5435 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5436 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5437 } 5438 /* primal vertices (benign) */ 5439 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5440 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5441 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5442 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5443 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5444 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5445 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5446 } 5447 /* free memory */ 5448 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5449 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5450 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5451 } else { 5452 nis = 0; 5453 nisdofs = 0; 5454 nisneu = 0; 5455 nisvert = 0; 5456 isarray = NULL; 5457 } 5458 /* destroy no longer needed map */ 5459 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5460 5461 /* restrict on coarse candidates (if needed) */ 5462 coarse_mat_is = NULL; 5463 if (csin) { 5464 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5465 if (redist) { 5466 PetscMPIInt rank; 5467 PetscInt spc,n_spc_p1,dest[1],destsize; 5468 5469 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5470 spc = active_procs/ncoarse; 5471 n_spc_p1 = active_procs%ncoarse; 5472 if (im_active) { 5473 destsize = 1; 5474 if (rank > n_spc_p1*(spc+1)-1) { 5475 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5476 } else { 5477 dest[0] = rank/(spc+1); 5478 } 5479 } else { 5480 destsize = 0; 5481 } 5482 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5483 } else if (csin_type_simple) { 5484 PetscMPIInt rank; 5485 PetscInt issize,isidx; 5486 5487 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5488 if (im_active) { 5489 issize = 1; 5490 isidx = (PetscInt)rank; 5491 } else { 5492 issize = 0; 5493 isidx = -1; 5494 } 5495 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5496 } else { /* get a suitable subassembling pattern from MATIS code */ 5497 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5498 } 5499 5500 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5501 if (!redist || ncoarse <= void_procs) { 5502 PetscInt ncoarse_cand,tissize,*nisindices; 5503 PetscInt *coarse_candidates; 5504 const PetscInt* tisindices; 5505 5506 /* get coarse candidates' ranks in pc communicator */ 5507 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5508 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5509 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5510 if (!coarse_candidates[i]) { 5511 coarse_candidates[ncoarse_cand++]=i; 5512 } 5513 } 5514 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5515 5516 5517 if (pcbddc->dbg_flag) { 5518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5519 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5520 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5522 for (i=0;i<ncoarse_cand;i++) { 5523 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5524 } 5525 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5527 } 5528 /* shift the pattern on coarse candidates */ 5529 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5530 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5531 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5532 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5533 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5534 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5535 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5536 } 5537 if (pcbddc->dbg_flag) { 5538 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5539 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5540 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5542 } 5543 } 5544 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5545 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5546 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5547 } else { /* this is the last level, so use just receiving processes in subcomm */ 5548 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5549 } 5550 } else { 5551 if (pcbddc->dbg_flag) { 5552 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5553 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5554 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5555 } 5556 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5557 coarse_mat_is = t_coarse_mat_is; 5558 } 5559 5560 /* create local to global scatters for coarse problem */ 5561 if (compute_vecs) { 5562 PetscInt lrows; 5563 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5564 if (coarse_mat_is) { 5565 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5566 } else { 5567 lrows = 0; 5568 } 5569 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5570 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5571 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5572 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5573 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5574 } 5575 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5576 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5577 5578 /* set defaults for coarse KSP and PC */ 5579 if (multilevel_allowed) { 5580 coarse_ksp_type = KSPRICHARDSON; 5581 coarse_pc_type = PCBDDC; 5582 } else { 5583 coarse_ksp_type = KSPPREONLY; 5584 coarse_pc_type = PCREDUNDANT; 5585 } 5586 5587 /* print some info if requested */ 5588 if (pcbddc->dbg_flag) { 5589 if (!multilevel_allowed) { 5590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5591 if (multilevel_requested) { 5592 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5593 } else if (pcbddc->max_levels) { 5594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5595 } 5596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5597 } 5598 } 5599 5600 /* create the coarse KSP object only once with defaults */ 5601 if (coarse_mat_is) { 5602 MatReuse coarse_mat_reuse; 5603 PetscViewer dbg_viewer = NULL; 5604 if (pcbddc->dbg_flag) { 5605 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5606 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5607 } 5608 if (!pcbddc->coarse_ksp) { 5609 char prefix[256],str_level[16]; 5610 size_t len; 5611 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5612 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5613 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5614 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5615 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5616 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5617 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5618 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5619 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5620 /* prefix */ 5621 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5622 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5623 if (!pcbddc->current_level) { 5624 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5625 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5626 } else { 5627 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5628 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5629 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5630 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5631 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5632 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5633 } 5634 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5635 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5636 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5637 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5638 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5639 /* allow user customization */ 5640 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5641 } 5642 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5643 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5644 if (nisdofs) { 5645 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5646 for (i=0;i<nisdofs;i++) { 5647 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5648 } 5649 } 5650 if (nisneu) { 5651 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5652 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5653 } 5654 if (nisvert) { 5655 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5656 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5657 } 5658 5659 /* get some info after set from options */ 5660 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5661 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5662 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5663 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5664 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5665 isbddc = PETSC_FALSE; 5666 } 5667 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5668 if (isredundant) { 5669 KSP inner_ksp; 5670 PC inner_pc; 5671 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5672 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5673 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5674 } 5675 5676 /* assemble coarse matrix */ 5677 if (coarse_reuse) { 5678 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5679 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5680 coarse_mat_reuse = MAT_REUSE_MATRIX; 5681 } else { 5682 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5683 } 5684 if (isbddc || isnn) { 5685 if (isbddc) { /* currently there's no API for this */ 5686 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5687 pcbddc->detect_disconnected = PETSC_TRUE; 5688 } 5689 if (pcbddc->coarsening_ratio > 1) { 5690 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5691 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5692 if (pcbddc->dbg_flag) { 5693 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5694 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5695 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5696 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5697 } 5698 } 5699 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5700 } else { 5701 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5702 coarse_mat = coarse_mat_is; 5703 } 5704 } else { 5705 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5706 } 5707 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5708 5709 /* propagate symmetry info of coarse matrix */ 5710 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5711 if (pc->pmat->symmetric_set) { 5712 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5713 } 5714 if (pc->pmat->hermitian_set) { 5715 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5716 } 5717 if (pc->pmat->spd_set) { 5718 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5719 } 5720 /* set operators */ 5721 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5722 if (pcbddc->dbg_flag) { 5723 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5724 } 5725 } else { /* processes non partecipating to coarse solver (if any) */ 5726 coarse_mat = 0; 5727 } 5728 ierr = PetscFree(isarray);CHKERRQ(ierr); 5729 #if 0 5730 { 5731 PetscViewer viewer; 5732 char filename[256]; 5733 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5734 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5735 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5736 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5737 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5738 } 5739 #endif 5740 5741 /* Compute coarse null space (special handling by BDDC only) */ 5742 #if 0 5743 if (pcbddc->NullSpace) { 5744 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5745 } 5746 #endif 5747 /* hack */ 5748 if (pcbddc->coarse_ksp) { 5749 Vec crhs,csol; 5750 5751 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5752 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5753 if (!csol) { 5754 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5755 } 5756 if (!crhs) { 5757 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5758 } 5759 } 5760 5761 /* compute null space for coarse solver if the benign trick has been requested */ 5762 if (pcbddc->benign_null) { 5763 5764 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5765 for (i=0;i<pcbddc->benign_n;i++) { 5766 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5767 } 5768 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5769 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5770 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5771 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5772 if (coarse_mat) { 5773 Vec nullv; 5774 PetscScalar *array,*array2; 5775 PetscInt nl; 5776 5777 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5778 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5779 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5780 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5781 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5782 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5783 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5784 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5785 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5786 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5787 } 5788 } 5789 5790 if (pcbddc->coarse_ksp) { 5791 PetscBool ispreonly; 5792 5793 if (CoarseNullSpace) { 5794 PetscBool isnull; 5795 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5796 if (0) { 5797 if (isbddc && !pcbddc->benign_saddle_point) { 5798 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5799 } else { 5800 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5801 } 5802 } else { 5803 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5804 } 5805 } 5806 /* setup coarse ksp */ 5807 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5808 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5809 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5810 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5811 KSP check_ksp; 5812 KSPType check_ksp_type; 5813 PC check_pc; 5814 Vec check_vec,coarse_vec; 5815 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5816 PetscInt its; 5817 PetscBool compute_eigs; 5818 PetscReal *eigs_r,*eigs_c; 5819 PetscInt neigs; 5820 const char *prefix; 5821 5822 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5823 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5824 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5825 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5826 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5827 if (ispreonly) { 5828 check_ksp_type = KSPPREONLY; 5829 compute_eigs = PETSC_FALSE; 5830 } else { 5831 check_ksp_type = KSPGMRES; 5832 compute_eigs = PETSC_TRUE; 5833 } 5834 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5835 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5836 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5837 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5838 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5839 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5840 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5841 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5842 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5843 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5844 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5845 /* create random vec */ 5846 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5847 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5848 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5849 if (CoarseNullSpace) { 5850 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5851 } 5852 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5853 /* solve coarse problem */ 5854 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5855 if (CoarseNullSpace) { 5856 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5857 } 5858 /* set eigenvalue estimation if preonly has not been requested */ 5859 if (compute_eigs) { 5860 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5861 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5862 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5863 lambda_max = eigs_r[neigs-1]; 5864 lambda_min = eigs_r[0]; 5865 if (pcbddc->use_coarse_estimates) { 5866 if (lambda_max>lambda_min) { 5867 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5868 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5869 } 5870 } 5871 } 5872 5873 /* check coarse problem residual error */ 5874 if (pcbddc->dbg_flag) { 5875 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5876 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5877 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5878 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5879 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5880 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5881 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5882 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5883 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5884 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5885 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5886 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5887 if (CoarseNullSpace) { 5888 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5889 } 5890 if (compute_eigs) { 5891 PetscReal lambda_max_s,lambda_min_s; 5892 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5893 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5894 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5895 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5896 for (i=0;i<neigs;i++) { 5897 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5898 } 5899 } 5900 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5901 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5902 } 5903 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5904 if (compute_eigs) { 5905 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5906 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5907 } 5908 } 5909 } 5910 /* print additional info */ 5911 if (pcbddc->dbg_flag) { 5912 /* waits until all processes reaches this point */ 5913 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5914 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5915 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5916 } 5917 5918 /* free memory */ 5919 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5920 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5921 PetscFunctionReturn(0); 5922 } 5923 5924 #undef __FUNCT__ 5925 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5926 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5927 { 5928 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5929 PC_IS* pcis = (PC_IS*)pc->data; 5930 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5931 IS subset,subset_mult,subset_n; 5932 PetscInt local_size,coarse_size=0; 5933 PetscInt *local_primal_indices=NULL; 5934 const PetscInt *t_local_primal_indices; 5935 PetscErrorCode ierr; 5936 5937 PetscFunctionBegin; 5938 /* Compute global number of coarse dofs */ 5939 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5940 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5941 } 5942 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5943 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5944 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5945 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5946 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5947 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5948 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5949 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5950 if (local_size != pcbddc->local_primal_size) { 5951 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5952 } 5953 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5954 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5955 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5956 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5957 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5958 5959 /* check numbering */ 5960 if (pcbddc->dbg_flag) { 5961 PetscScalar coarsesum,*array,*array2; 5962 PetscInt i; 5963 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5964 5965 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5967 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5968 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5969 /* counter */ 5970 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5971 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5972 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5973 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5974 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5975 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5976 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5977 for (i=0;i<pcbddc->local_primal_size;i++) { 5978 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5979 } 5980 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5981 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5982 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5983 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5984 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5985 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5986 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5987 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5988 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5989 for (i=0;i<pcis->n;i++) { 5990 if (array[i] != 0.0 && array[i] != array2[i]) { 5991 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5992 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5993 set_error = PETSC_TRUE; 5994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5995 } 5996 } 5997 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5998 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6000 for (i=0;i<pcis->n;i++) { 6001 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6002 } 6003 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6004 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6005 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6006 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6007 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6008 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6009 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6010 PetscInt *gidxs; 6011 6012 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6013 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6014 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6015 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6017 for (i=0;i<pcbddc->local_primal_size;i++) { 6018 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6019 } 6020 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6021 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6022 } 6023 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6024 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6025 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6026 } 6027 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6028 /* get back data */ 6029 *coarse_size_n = coarse_size; 6030 *local_primal_indices_n = local_primal_indices; 6031 PetscFunctionReturn(0); 6032 } 6033 6034 #undef __FUNCT__ 6035 #define __FUNCT__ "PCBDDCGlobalToLocal" 6036 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6037 { 6038 IS localis_t; 6039 PetscInt i,lsize,*idxs,n; 6040 PetscScalar *vals; 6041 PetscErrorCode ierr; 6042 6043 PetscFunctionBegin; 6044 /* get indices in local ordering exploiting local to global map */ 6045 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6046 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6047 for (i=0;i<lsize;i++) vals[i] = 1.0; 6048 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6049 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6050 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6051 if (idxs) { /* multilevel guard */ 6052 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6053 } 6054 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6055 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6056 ierr = PetscFree(vals);CHKERRQ(ierr); 6057 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6058 /* now compute set in local ordering */ 6059 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6060 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6061 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6062 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6063 for (i=0,lsize=0;i<n;i++) { 6064 if (PetscRealPart(vals[i]) > 0.5) { 6065 lsize++; 6066 } 6067 } 6068 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6069 for (i=0,lsize=0;i<n;i++) { 6070 if (PetscRealPart(vals[i]) > 0.5) { 6071 idxs[lsize++] = i; 6072 } 6073 } 6074 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6075 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6076 *localis = localis_t; 6077 PetscFunctionReturn(0); 6078 } 6079 6080 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6081 #undef __FUNCT__ 6082 #define __FUNCT__ "PCBDDCMatMult_Private" 6083 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6084 { 6085 PCBDDCChange_ctx change_ctx; 6086 PetscErrorCode ierr; 6087 6088 PetscFunctionBegin; 6089 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6090 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6091 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6092 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6093 PetscFunctionReturn(0); 6094 } 6095 6096 #undef __FUNCT__ 6097 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6098 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6099 { 6100 PCBDDCChange_ctx change_ctx; 6101 PetscErrorCode ierr; 6102 6103 PetscFunctionBegin; 6104 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6105 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6106 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6107 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6108 PetscFunctionReturn(0); 6109 } 6110 6111 #undef __FUNCT__ 6112 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6113 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6114 { 6115 PC_IS *pcis=(PC_IS*)pc->data; 6116 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6117 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6118 Mat S_j; 6119 PetscInt *used_xadj,*used_adjncy; 6120 PetscBool free_used_adj; 6121 PetscErrorCode ierr; 6122 6123 PetscFunctionBegin; 6124 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6125 free_used_adj = PETSC_FALSE; 6126 if (pcbddc->sub_schurs_layers == -1) { 6127 used_xadj = NULL; 6128 used_adjncy = NULL; 6129 } else { 6130 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6131 used_xadj = pcbddc->mat_graph->xadj; 6132 used_adjncy = pcbddc->mat_graph->adjncy; 6133 } else if (pcbddc->computed_rowadj) { 6134 used_xadj = pcbddc->mat_graph->xadj; 6135 used_adjncy = pcbddc->mat_graph->adjncy; 6136 } else { 6137 PetscBool flg_row=PETSC_FALSE; 6138 const PetscInt *xadj,*adjncy; 6139 PetscInt nvtxs; 6140 6141 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6142 if (flg_row) { 6143 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6144 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6145 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6146 free_used_adj = PETSC_TRUE; 6147 } else { 6148 pcbddc->sub_schurs_layers = -1; 6149 used_xadj = NULL; 6150 used_adjncy = NULL; 6151 } 6152 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6153 } 6154 } 6155 6156 /* setup sub_schurs data */ 6157 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6158 if (!sub_schurs->schur_explicit) { 6159 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6160 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6161 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6162 } else { 6163 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6164 PetscBool isseqaij; 6165 PetscInt benign_n; 6166 6167 if (!pcbddc->use_vertices && reuse_solvers) { 6168 PetscInt n_vertices; 6169 6170 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6171 reuse_solvers = (PetscBool)!n_vertices; 6172 } 6173 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6174 if (!isseqaij) { 6175 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6176 if (matis->A == pcbddc->local_mat) { 6177 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6178 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6179 } else { 6180 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6181 } 6182 } 6183 if (!pcbddc->benign_change_explicit) { 6184 benign_n = pcbddc->benign_n; 6185 } else { 6186 benign_n = 0; 6187 } 6188 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6189 /* if the velocities are in the original basis, then we need to eliminate the no-net-flux from the S_Ej 6190 we assume that the user has passed in the associated quadrature weights in the near-null space attached to pc->pmat 6191 We need to compute the change of basis according to the quadrature weights, and this could not be done, at the moment, without some hacking */ 6192 if (pcbddc->benign_saddle_point && !pcbddc->use_change_of_basis && !pcbddc->user_ChangeOfBasisMatrix) { 6193 PC_IS *pcisf; 6194 PC_BDDC *pcbddcf; 6195 PC pcf; 6196 6197 if (pcbddc->sub_schurs_rebuild) { 6198 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6199 } 6200 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6201 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6202 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6203 /* hacks */ 6204 pcisf = (PC_IS*)pcf->data; 6205 pcbddcf = (PC_BDDC*)pcf->data; 6206 *pcisf = *pcis; 6207 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6208 pcbddcf->mat_graph = pcbddc->mat_graph; 6209 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6210 pcbddcf->sub_schurs = sub_schurs; 6211 pcbddcf->use_faces = PETSC_TRUE; 6212 pcbddcf->use_change_of_basis = PETSC_TRUE; 6213 pcbddcf->use_change_on_faces = PETSC_TRUE; 6214 pcbddcf->use_deluxe_scaling = PETSC_TRUE; 6215 pcbddcf->fake_change = PETSC_TRUE; 6216 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6217 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6218 pcbddcf->sub_schurs = NULL; 6219 ierr = MatDestroy(&pcbddcf->ConstraintMatrix);CHKERRQ(ierr); 6220 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6221 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6222 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6223 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6224 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6225 pcf->ops->destroy = NULL; 6226 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6227 } 6228 } 6229 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6230 6231 /* free adjacency */ 6232 if (free_used_adj) { 6233 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6234 } 6235 PetscFunctionReturn(0); 6236 } 6237 6238 #undef __FUNCT__ 6239 #define __FUNCT__ "PCBDDCInitSubSchurs" 6240 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6241 { 6242 PC_IS *pcis=(PC_IS*)pc->data; 6243 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6244 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6245 PCBDDCGraph graph; 6246 PetscErrorCode ierr; 6247 6248 PetscFunctionBegin; 6249 /* attach interface graph for determining subsets */ 6250 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6251 IS verticesIS,verticescomm; 6252 PetscInt vsize,*idxs; 6253 6254 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6255 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6256 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6257 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6258 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6259 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6260 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6261 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6262 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6263 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6264 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6265 /* 6266 if (pcbddc->dbg_flag) { 6267 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6268 } 6269 */ 6270 } else { 6271 graph = pcbddc->mat_graph; 6272 } 6273 6274 /* sub_schurs init */ 6275 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6276 6277 /* free graph struct */ 6278 if (pcbddc->sub_schurs_rebuild) { 6279 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6280 } 6281 PetscFunctionReturn(0); 6282 } 6283 6284 #undef __FUNCT__ 6285 #define __FUNCT__ "PCBDDCCheckOperator" 6286 PetscErrorCode PCBDDCCheckOperator(PC pc) 6287 { 6288 PC_IS *pcis=(PC_IS*)pc->data; 6289 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6290 PetscErrorCode ierr; 6291 6292 PetscFunctionBegin; 6293 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6294 IS zerodiag = NULL; 6295 Mat S_j,B0_B=NULL; 6296 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6297 PetscScalar *p0_check,*array,*array2; 6298 PetscReal norm; 6299 PetscInt i; 6300 6301 /* B0 and B0_B */ 6302 if (zerodiag) { 6303 IS dummy; 6304 6305 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6306 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6307 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6308 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6309 } 6310 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6311 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6312 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6313 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6314 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6315 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6316 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6317 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6318 /* S_j */ 6319 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6320 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6321 6322 /* mimic vector in \widetilde{W}_\Gamma */ 6323 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6324 /* continuous in primal space */ 6325 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6326 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6327 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6328 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6329 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6330 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6331 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6332 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6333 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6334 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6335 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6336 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6337 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6338 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6339 6340 /* assemble rhs for coarse problem */ 6341 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6342 /* local with Schur */ 6343 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6344 if (zerodiag) { 6345 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6346 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6347 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6348 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6349 } 6350 /* sum on primal nodes the local contributions */ 6351 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6352 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6353 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6354 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6355 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6356 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6357 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6358 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6359 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6360 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6361 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6362 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6363 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6364 /* scale primal nodes (BDDC sums contibutions) */ 6365 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6366 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6367 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6368 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6369 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6370 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6371 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6372 /* global: \widetilde{B0}_B w_\Gamma */ 6373 if (zerodiag) { 6374 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6375 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6376 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6377 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6378 } 6379 /* BDDC */ 6380 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6381 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6382 6383 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6384 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6385 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6386 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6387 for (i=0;i<pcbddc->benign_n;i++) { 6388 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6389 } 6390 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6391 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6392 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6393 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6394 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6395 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6396 } 6397 PetscFunctionReturn(0); 6398 } 6399