1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 PetscInt *nnz; 799 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 800 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 801 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 802 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 803 for (i=0;i<pcbddc->benign_n;i++) { 804 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 805 nnz[i] = n - nnz[i]; 806 } 807 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 808 ierr = PetscFree(nnz);CHKERRQ(ierr); 809 } 810 811 for (i=0;i<pcbddc->benign_n;i++) { 812 PetscScalar *array; 813 PetscInt *idxs,j,nz,cum; 814 815 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 818 for (j=0;j<nz;j++) vals[j] = 1.; 819 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 823 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 824 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 825 cum = 0; 826 for (j=0;j<n;j++) { 827 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 828 vals[cum] = array[j]; 829 idxs_ins[cum] = j; 830 cum++; 831 } 832 } 833 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 834 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 835 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 840 } 841 } else { /* push */ 842 if (pcbddc->benign_change_explicit) { 843 PetscInt i; 844 845 for (i=0;i<pcbddc->benign_n;i++) { 846 PetscScalar *B0_vals; 847 PetscInt *B0_cols,B0_ncol; 848 849 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 850 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 851 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 852 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 853 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 859 } 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCAdaptiveSelection" 866 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 867 { 868 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 869 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 870 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 871 PetscBLASInt *B_iwork,*B_ifail; 872 PetscScalar *work,lwork; 873 PetscScalar *St,*S,*eigv; 874 PetscScalar *Sarray,*Starray; 875 PetscReal *eigs,thresh; 876 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 877 PetscBool allocated_S_St; 878 #if defined(PETSC_USE_COMPLEX) 879 PetscReal *rwork; 880 #endif 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 if (!sub_schurs->schur_explicit) { 885 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 886 } 887 888 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 if (pcbddc->dbg_flag) { 893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 if (pcbddc->dbg_flag) { 900 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 901 } 902 903 /* max size of subsets */ 904 mss = 0; 905 for (i=0;i<sub_schurs->n_subs;i++) { 906 PetscInt subset_size; 907 908 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 909 mss = PetscMax(mss,subset_size); 910 } 911 912 /* min/max and threshold */ 913 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 914 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 915 nmax = PetscMax(nmin,nmax); 916 allocated_S_St = PETSC_FALSE; 917 if (nmin) { 918 allocated_S_St = PETSC_TRUE; 919 } 920 921 /* allocate lapack workspace */ 922 cum = cum2 = 0; 923 maxneigs = 0; 924 for (i=0;i<sub_schurs->n_subs;i++) { 925 PetscInt n,subset_size; 926 927 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 928 n = PetscMin(subset_size,nmax); 929 cum += subset_size; 930 cum2 += subset_size*n; 931 maxneigs = PetscMax(maxneigs,n); 932 } 933 if (mss) { 934 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 935 PetscBLASInt B_itype = 1; 936 PetscBLASInt B_N = mss; 937 PetscReal zero = 0.0; 938 PetscReal eps = 0.0; /* dlamch? */ 939 940 B_lwork = -1; 941 S = NULL; 942 St = NULL; 943 eigs = NULL; 944 eigv = NULL; 945 B_iwork = NULL; 946 B_ifail = NULL; 947 #if defined(PETSC_USE_COMPLEX) 948 rwork = NULL; 949 #endif 950 thresh = 1.0; 951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 952 #if defined(PETSC_USE_COMPLEX) 953 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 954 #else 955 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 956 #endif 957 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 958 ierr = PetscFPTrapPop();CHKERRQ(ierr); 959 } else { 960 /* TODO */ 961 } 962 } else { 963 lwork = 0; 964 } 965 966 nv = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 968 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 969 } 970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 971 if (allocated_S_St) { 972 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 973 } 974 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 977 #endif 978 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 979 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 980 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 981 nv+cum,&pcbddc->adaptive_constraints_idxs, 982 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 983 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 984 985 maxneigs = 0; 986 cum = cum2 = cumarray = 0; 987 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 988 pcbddc->adaptive_constraints_data_ptr[0] = 0; 989 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 990 const PetscInt *idxs; 991 992 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 993 for (cum=0;cum<nv;cum++) { 994 pcbddc->adaptive_constraints_n[cum] = 1; 995 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 996 pcbddc->adaptive_constraints_data[cum] = 1.0; 997 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 998 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 999 } 1000 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1001 } 1002 1003 if (mss) { /* multilevel */ 1004 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1005 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1006 } 1007 1008 thresh = pcbddc->adaptive_threshold; 1009 for (i=0;i<sub_schurs->n_subs;i++) { 1010 const PetscInt *idxs; 1011 PetscReal infty = PETSC_MAX_REAL; 1012 PetscInt j,subset_size,eigs_start = 0; 1013 PetscBLASInt B_N; 1014 PetscBool same_data = PETSC_FALSE; 1015 1016 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1017 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1018 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1019 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1020 if (sub_schurs->is_hermitian) { 1021 PetscInt j,k; 1022 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1023 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1024 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 } 1026 for (j=0;j<subset_size;j++) { 1027 for (k=j;k<subset_size;k++) { 1028 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1029 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1030 } 1031 } 1032 } else { 1033 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1034 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1035 } 1036 } else { 1037 S = Sarray + cumarray; 1038 St = Starray + cumarray; 1039 } 1040 /* see if we can save some work */ 1041 if (sub_schurs->n_subs == 1) { 1042 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1043 } 1044 1045 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1046 B_neigs = 0; 1047 } else { 1048 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1049 PetscBLASInt B_itype = 1; 1050 PetscBLASInt B_IL, B_IU; 1051 PetscReal eps = -1.0; /* dlamch? */ 1052 PetscInt nmin_s; 1053 1054 if (pcbddc->dbg_flag) { 1055 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1056 } 1057 1058 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1059 if (thresh > 1.+PETSC_SMALL) { 1060 1061 /* ask for eigenvalues larger than thresh */ 1062 #if defined(PETSC_USE_COMPLEX) 1063 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1064 #else 1065 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1066 #endif 1067 } else { 1068 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1069 B_IL = 1; 1070 #if defined(PETSC_USE_COMPLEX) 1071 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1072 #else 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1074 #endif 1075 } 1076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1077 if (B_ierr) { 1078 if (B_ierr < 0 ) { 1079 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1080 } else if (B_ierr <= B_N) { 1081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1082 } else { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1084 } 1085 } 1086 1087 if (B_neigs > nmax) { 1088 if (pcbddc->dbg_flag) { 1089 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1090 } 1091 eigs_start = B_neigs -nmax; 1092 B_neigs = nmax; 1093 } 1094 1095 nmin_s = PetscMin(nmin,B_N); 1096 if (B_neigs < nmin_s) { 1097 PetscBLASInt B_neigs2; 1098 1099 B_IU = B_N - B_neigs; 1100 B_IL = B_N - nmin_s + 1; 1101 if (pcbddc->dbg_flag) { 1102 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1103 } 1104 if (sub_schurs->is_hermitian) { 1105 PetscInt j; 1106 for (j=0;j<subset_size;j++) { 1107 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1108 } 1109 for (j=0;j<subset_size;j++) { 1110 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1111 } 1112 } else { 1113 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1114 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1115 } 1116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1117 #if defined(PETSC_USE_COMPLEX) 1118 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1119 #else 1120 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1121 #endif 1122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1123 B_neigs += B_neigs2; 1124 } 1125 if (B_ierr) { 1126 if (B_ierr < 0 ) { 1127 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1128 } else if (B_ierr <= B_N) { 1129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1130 } else { 1131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1132 } 1133 } 1134 if (pcbddc->dbg_flag) { 1135 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1136 for (j=0;j<B_neigs;j++) { 1137 if (eigs[j] == 0.0) { 1138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1141 } 1142 } 1143 } 1144 } else { 1145 /* TODO */ 1146 } 1147 } 1148 /* change the basis back to the original one */ 1149 if (sub_schurs->change) { 1150 Mat change,change_subs,phi,phit; 1151 IS issubs; 1152 1153 if (pcbddc->dbg_flag > 1) { 1154 PetscInt ii; 1155 for (ii=0;ii<B_neigs;ii++) { 1156 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1157 for (j=0;j<B_N;j++) { 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1159 } 1160 } 1161 } 1162 ierr = ISCreateStride(PETSC_COMM_SELF,subset_size,cum2,1,&issubs);CHKERRQ(ierr); 1163 ierr = KSPGetOperators(sub_schurs->change,&change,NULL);CHKERRQ(ierr); 1164 ierr = MatGetSubMatrix(change,issubs,issubs,MAT_INITIAL_MATRIX,&change_subs);CHKERRQ(ierr); 1165 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1166 ierr = MatMatMult(change_subs,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1167 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1168 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1169 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1170 ierr = MatDestroy(&change_subs);CHKERRQ(ierr); 1171 ierr = ISDestroy(&issubs);CHKERRQ(ierr); 1172 } 1173 maxneigs = PetscMax(B_neigs,maxneigs); 1174 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1175 if (B_neigs) { 1176 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1177 1178 if (pcbddc->dbg_flag > 1) { 1179 PetscInt ii; 1180 for (ii=0;ii<B_neigs;ii++) { 1181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1182 for (j=0;j<B_N;j++) { 1183 #if defined(PETSC_USE_COMPLEX) 1184 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1185 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1187 #else 1188 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1189 #endif 1190 } 1191 } 1192 } 1193 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1194 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1195 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1196 cum++; 1197 } 1198 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1199 /* shift for next computation */ 1200 cumarray += subset_size*subset_size; 1201 cum2 += subset_size; 1202 } 1203 if (pcbddc->dbg_flag) { 1204 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1205 } 1206 1207 if (mss) { 1208 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1209 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1210 /* destroy matrices (junk) */ 1211 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1212 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1213 } 1214 if (allocated_S_St) { 1215 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1216 } 1217 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1218 #if defined(PETSC_USE_COMPLEX) 1219 ierr = PetscFree(rwork);CHKERRQ(ierr); 1220 #endif 1221 if (pcbddc->dbg_flag) { 1222 PetscInt maxneigs_r; 1223 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "PCBDDCSetUpSolvers" 1231 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1232 { 1233 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1234 PetscScalar *coarse_submat_vals; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1239 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1240 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1241 1242 /* Setup local neumann solver ksp_R */ 1243 /* PCBDDCSetUpLocalScatters should be called first! */ 1244 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1245 1246 /* Change global null space passed in by the user if change of basis has been requested */ 1247 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1248 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1249 } 1250 1251 /* 1252 Setup local correction and local part of coarse basis. 1253 Gives back the dense local part of the coarse matrix in column major ordering 1254 */ 1255 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1256 1257 /* Compute total number of coarse nodes and setup coarse solver */ 1258 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1259 1260 /* free */ 1261 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "PCBDDCResetCustomization" 1267 PetscErrorCode PCBDDCResetCustomization(PC pc) 1268 { 1269 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1270 PetscErrorCode ierr; 1271 1272 PetscFunctionBegin; 1273 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1274 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1275 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1276 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1277 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1278 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1279 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1280 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1281 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1282 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1283 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1284 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "PCBDDCResetTopography" 1290 PetscErrorCode PCBDDCResetTopography(PC pc) 1291 { 1292 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1293 PetscInt i; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1298 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1299 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1300 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1301 for (i=0;i<pcbddc->n_local_subs;i++) { 1302 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1305 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "PCBDDCResetSolvers" 1311 PetscErrorCode PCBDDCResetSolvers(PC pc) 1312 { 1313 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1318 if (pcbddc->coarse_phi_B) { 1319 PetscScalar *array; 1320 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1321 ierr = PetscFree(array);CHKERRQ(ierr); 1322 } 1323 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1324 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1325 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1326 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1327 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1328 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1329 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1330 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1331 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1332 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1333 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1334 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1335 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1336 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1337 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1338 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1339 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1341 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1342 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1343 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1344 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1345 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1346 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1347 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1348 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1349 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1350 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1351 if (pcbddc->benign_zerodiag_subs) { 1352 PetscInt i; 1353 for (i=0;i<pcbddc->benign_n;i++) { 1354 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1355 } 1356 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1357 } 1358 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1364 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1365 { 1366 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1367 PC_IS *pcis = (PC_IS*)pc->data; 1368 VecType impVecType; 1369 PetscInt n_constraints,n_R,old_size; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 if (!pcbddc->ConstraintMatrix) { 1374 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1375 } 1376 /* get sizes */ 1377 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1378 n_R = pcis->n - pcbddc->n_vertices; 1379 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1380 /* local work vectors (try to avoid unneeded work)*/ 1381 /* R nodes */ 1382 old_size = -1; 1383 if (pcbddc->vec1_R) { 1384 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1385 } 1386 if (n_R != old_size) { 1387 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1388 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1389 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1390 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1391 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1392 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1393 } 1394 /* local primal dofs */ 1395 old_size = -1; 1396 if (pcbddc->vec1_P) { 1397 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1398 } 1399 if (pcbddc->local_primal_size != old_size) { 1400 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1401 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1402 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1403 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1404 } 1405 /* local explicit constraints */ 1406 old_size = -1; 1407 if (pcbddc->vec1_C) { 1408 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1409 } 1410 if (n_constraints && n_constraints != old_size) { 1411 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1412 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1413 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1414 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "PCBDDCSetUpCorrection" 1421 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1422 { 1423 PetscErrorCode ierr; 1424 /* pointers to pcis and pcbddc */ 1425 PC_IS* pcis = (PC_IS*)pc->data; 1426 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1427 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1428 /* submatrices of local problem */ 1429 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1430 /* submatrices for benign trick */ 1431 Mat B0_R = NULL, B0_V = NULL; 1432 /* submatrices of local coarse problem */ 1433 Mat S_VV,S_CV,S_VC,S_CC; 1434 /* working matrices */ 1435 Mat C_CR; 1436 /* additional working stuff */ 1437 PC pc_R; 1438 Mat F; 1439 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1440 1441 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1442 PetscScalar *work; 1443 PetscInt *idx_V_B; 1444 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1445 PetscInt i,n_R,n_D,n_B; 1446 1447 /* some shortcuts to scalars */ 1448 PetscScalar one=1.0,m_one=-1.0; 1449 1450 PetscFunctionBegin; 1451 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1452 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1453 } 1454 1455 /* Set Non-overlapping dimensions */ 1456 n_vertices = pcbddc->n_vertices; 1457 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1458 n_B = pcis->n_B; 1459 n_D = pcis->n - n_B; 1460 n_R = pcis->n - n_vertices; 1461 1462 /* vertices in boundary numbering */ 1463 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1464 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1465 if (i != n_vertices) { 1466 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1467 } 1468 1469 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1470 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1471 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1472 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1473 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1474 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1475 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1476 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1477 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1478 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1479 1480 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1481 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1482 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1483 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1484 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1485 lda_rhs = n_R; 1486 need_benign_correction = PETSC_FALSE; 1487 if (isLU || isILU || isCHOL) { 1488 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1489 } else if (sub_schurs->reuse_solver) { 1490 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1491 MatFactorType type; 1492 1493 F = reuse_solver->F; 1494 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1495 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1496 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1497 need_benign_correction = !!reuse_solver->benign_n; 1498 } else { 1499 F = NULL; 1500 } 1501 1502 /* allocate workspace */ 1503 n = 0; 1504 if (n_constraints) { 1505 n += lda_rhs*n_constraints; 1506 } 1507 if (n_vertices) { 1508 n = PetscMax(2*lda_rhs*n_vertices,n); 1509 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1510 } 1511 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1512 1513 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1514 if (n_constraints) { 1515 Mat M1,M2,M3; 1516 Mat auxmat; 1517 IS is_aux; 1518 PetscScalar *array,*array2; 1519 1520 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1521 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1522 1523 /* Extract constraints on R nodes: C_{CR} */ 1524 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1525 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1526 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1527 1528 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1529 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1530 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1531 for (i=0;i<n_constraints;i++) { 1532 const PetscScalar *row_cmat_values; 1533 const PetscInt *row_cmat_indices; 1534 PetscInt size_of_constraint,j; 1535 1536 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1537 for (j=0;j<size_of_constraint;j++) { 1538 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1539 } 1540 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1541 } 1542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1543 if (F) { 1544 Mat B; 1545 1546 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1547 if (need_benign_correction) { 1548 PetscScalar *marr; 1549 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1550 1551 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1552 for (i=0;i<n_constraints;i++) { 1553 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1554 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1556 } 1557 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1558 } 1559 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1560 if (need_benign_correction) { 1561 PetscScalar *marr; 1562 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1563 1564 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1565 for (i=0;i<n_constraints;i++) { 1566 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1567 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1568 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1569 } 1570 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1571 } 1572 ierr = MatDestroy(&B);CHKERRQ(ierr); 1573 } else { 1574 PetscScalar *marr; 1575 1576 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1577 for (i=0;i<n_constraints;i++) { 1578 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1579 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1580 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1581 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1582 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1583 } 1584 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1585 } 1586 if (!pcbddc->switch_static) { 1587 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1588 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1589 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1590 for (i=0;i<n_constraints;i++) { 1591 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1592 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1593 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1594 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1596 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1597 } 1598 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1599 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1600 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1601 } else { 1602 if (lda_rhs != n_R) { 1603 IS dummy; 1604 1605 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1606 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1607 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1608 } else { 1609 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1610 pcbddc->local_auxmat2 = local_auxmat2_R; 1611 } 1612 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1613 } 1614 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1615 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1616 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1617 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1618 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1619 if (isCHOL) { 1620 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1623 } 1624 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1625 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1626 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1627 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1628 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1629 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1630 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1631 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1632 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1633 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1634 } 1635 1636 /* Extract B0_R */ 1637 if (pcbddc->benign_n) { 1638 IS dummy; 1639 PetscReal norm; 1640 1641 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1642 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1643 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1644 if (norm < PETSC_SMALL) { 1645 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1646 } 1647 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1648 } 1649 1650 /* Get submatrices from subdomain matrix */ 1651 if (n_vertices) { 1652 IS is_aux; 1653 1654 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1655 IS tis; 1656 1657 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1658 ierr = ISSort(tis);CHKERRQ(ierr); 1659 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1660 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1661 } else { 1662 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1663 } 1664 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1665 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1666 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1667 if (pcbddc->benign_n) { 1668 IS dummy; 1669 1670 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1671 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1672 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1673 } 1674 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1675 } 1676 1677 /* Matrix of coarse basis functions (local) */ 1678 if (pcbddc->coarse_phi_B) { 1679 PetscInt on_B,on_primal,on_D=n_D; 1680 if (pcbddc->coarse_phi_D) { 1681 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1682 } 1683 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1684 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1685 PetscScalar *marray; 1686 1687 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1688 ierr = PetscFree(marray);CHKERRQ(ierr); 1689 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1690 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1691 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1692 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1693 } 1694 } 1695 1696 if (!pcbddc->coarse_phi_B) { 1697 PetscScalar *marray; 1698 1699 n = n_B*pcbddc->local_primal_size; 1700 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1701 n += n_D*pcbddc->local_primal_size; 1702 } 1703 if (!pcbddc->symmetric_primal) { 1704 n *= 2; 1705 } 1706 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1707 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1708 n = n_B*pcbddc->local_primal_size; 1709 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1710 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1711 n += n_D*pcbddc->local_primal_size; 1712 } 1713 if (!pcbddc->symmetric_primal) { 1714 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1715 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1716 n = n_B*pcbddc->local_primal_size; 1717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1718 } 1719 } else { 1720 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1721 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1722 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1723 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1724 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1725 } 1726 } 1727 } 1728 1729 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1730 p0_lidx_I = NULL; 1731 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1732 const PetscInt *idxs; 1733 1734 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1735 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1736 for (i=0;i<pcbddc->benign_n;i++) { 1737 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1738 } 1739 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1740 } 1741 1742 /* vertices */ 1743 if (n_vertices) { 1744 1745 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1746 1747 if (n_R) { 1748 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1749 PetscBLASInt B_N,B_one = 1; 1750 PetscScalar *x,*y; 1751 PetscBool isseqaij; 1752 1753 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1754 if (need_benign_correction) { 1755 ISLocalToGlobalMapping RtoN; 1756 IS is_p0; 1757 PetscInt *idxs_p0,n; 1758 1759 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1760 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1761 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1762 if (n != pcbddc->benign_n) { 1763 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1764 } 1765 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1766 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1767 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1768 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1769 } 1770 1771 if (lda_rhs == n_R) { 1772 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1773 } else { 1774 PetscScalar *av,*array; 1775 const PetscInt *xadj,*adjncy; 1776 PetscInt n; 1777 PetscBool flg_row; 1778 1779 array = work+lda_rhs*n_vertices; 1780 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1781 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1782 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1784 for (i=0;i<n;i++) { 1785 PetscInt j; 1786 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1787 } 1788 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1789 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1790 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1791 } 1792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1793 if (F) { 1794 if (need_benign_correction) { 1795 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1796 PetscScalar *marr; 1797 1798 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1799 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1800 1801 | 0 0 0 | (V) 1802 L = | 0 0 -1 | (P-p0) 1803 | 0 0 -1 | (p0) 1804 1805 */ 1806 for (i=0;i<reuse_solver->benign_n;i++) { 1807 const PetscScalar *vals; 1808 const PetscInt *idxs,*idxs_zero; 1809 PetscInt n,j,nz; 1810 1811 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1812 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1813 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1814 for (j=0;j<n;j++) { 1815 PetscScalar val = vals[j]; 1816 PetscInt k,col = idxs[j]; 1817 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1818 } 1819 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1820 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1821 } 1822 /* need to correct the rhs */ 1823 for (i=0;i<n_vertices;i++) { 1824 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1825 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1827 } 1828 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1829 } 1830 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1831 /* need to correct the solution */ 1832 if (need_benign_correction) { 1833 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1834 PetscScalar *marr; 1835 1836 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1837 for (i=0;i<n_vertices;i++) { 1838 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1839 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1840 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1841 } 1842 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1843 } 1844 } else { 1845 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1846 for (i=0;i<n_vertices;i++) { 1847 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1848 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1849 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1850 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1851 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1852 } 1853 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1854 } 1855 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1856 /* S_VV and S_CV */ 1857 if (n_constraints) { 1858 Mat B; 1859 1860 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1861 for (i=0;i<n_vertices;i++) { 1862 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1863 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1864 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1867 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1868 } 1869 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1870 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1871 ierr = MatDestroy(&B);CHKERRQ(ierr); 1872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1873 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1874 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1875 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1876 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1877 ierr = MatDestroy(&B);CHKERRQ(ierr); 1878 } 1879 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1880 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1881 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1882 } 1883 if (lda_rhs != n_R) { 1884 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1885 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1886 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1887 } 1888 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1889 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1890 if (need_benign_correction) { 1891 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1892 PetscScalar *marr,*sums; 1893 1894 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1895 ierr = MatDenseGetArray(S_VVt,&marr); 1896 for (i=0;i<reuse_solver->benign_n;i++) { 1897 const PetscScalar *vals; 1898 const PetscInt *idxs,*idxs_zero; 1899 PetscInt n,j,nz; 1900 1901 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1902 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1903 for (j=0;j<n_vertices;j++) { 1904 PetscInt k; 1905 sums[j] = 0.; 1906 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1907 } 1908 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1909 for (j=0;j<n;j++) { 1910 PetscScalar val = vals[j]; 1911 PetscInt k; 1912 for (k=0;k<n_vertices;k++) { 1913 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1914 } 1915 } 1916 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1917 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1918 } 1919 ierr = PetscFree(sums);CHKERRQ(ierr); 1920 ierr = MatDenseRestoreArray(S_VVt,&marr); 1921 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1922 } 1923 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1924 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1925 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1926 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1927 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1928 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1929 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1930 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1931 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1932 } else { 1933 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1934 } 1935 for (i=0;i<pcbddc->benign_n;i++) { 1936 const PetscScalar *vals; 1937 const PetscInt *idxs; 1938 PetscInt n,j,primal_idx; 1939 1940 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1941 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1942 for (j=0;j<n;j++) { 1943 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1944 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1945 } 1946 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1947 } 1948 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1949 1950 /* coarse basis functions */ 1951 for (i=0;i<n_vertices;i++) { 1952 PetscScalar *y; 1953 1954 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1955 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1956 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1957 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1959 y[n_B*i+idx_V_B[i]] = 1.0; 1960 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1962 1963 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1964 PetscInt j; 1965 1966 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1967 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1968 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1969 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1971 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1972 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1973 } 1974 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1975 } 1976 /* if n_R == 0 the object is not destroyed */ 1977 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1978 } 1979 1980 if (n_constraints) { 1981 Mat B; 1982 1983 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1984 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1985 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1986 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1987 if (n_vertices) { 1988 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1989 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1990 } else { 1991 Mat S_VCt; 1992 1993 if (lda_rhs != n_R) { 1994 ierr = MatDestroy(&B);CHKERRQ(ierr); 1995 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1996 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1997 } 1998 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1999 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2000 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2001 } 2002 } 2003 ierr = MatDestroy(&B);CHKERRQ(ierr); 2004 /* coarse basis functions */ 2005 for (i=0;i<n_constraints;i++) { 2006 PetscScalar *y; 2007 2008 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2009 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2010 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2011 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2014 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2015 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2016 PetscInt j; 2017 2018 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2019 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2020 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2021 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2023 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2024 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2025 } 2026 if (B0_R) { 2027 Vec benign_proj; 2028 const PetscScalar *array; 2029 PetscInt j; 2030 2031 ierr = MatCreateVecs(B0_R,NULL,&benign_proj);CHKERRQ(ierr); 2032 ierr = MatMult(B0_R,pcbddc->vec1_R,benign_proj);CHKERRQ(ierr); 2033 ierr = VecGetArrayRead(benign_proj,&array);CHKERRQ(ierr); 2034 for (j=0;j<pcbddc->benign_n;j++) { 2035 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2036 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i+n_vertices] = array[j]; 2037 coarse_submat_vals[(i+n_vertices)*pcbddc->local_primal_size+primal_idx] = array[j]; 2038 } 2039 ierr = VecRestoreArrayRead(benign_proj,&array);CHKERRQ(ierr); 2040 ierr = VecDestroy(&benign_proj);CHKERRQ(ierr); 2041 } 2042 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2043 } 2044 } 2045 if (n_constraints) { 2046 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2047 } 2048 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2049 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2050 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 2051 2052 /* compute other basis functions for non-symmetric problems */ 2053 if (!pcbddc->symmetric_primal) { 2054 Mat B_V=NULL,B_C=NULL; 2055 PetscScalar *marray; 2056 2057 if (n_constraints) { 2058 Mat S_CCT,C_CRT; 2059 2060 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2061 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2062 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2063 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2064 if (n_vertices) { 2065 Mat S_VCT; 2066 2067 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2068 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2069 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2070 } 2071 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2072 } 2073 if (n_vertices && n_R) { 2074 PetscScalar *av,*marray; 2075 const PetscInt *xadj,*adjncy; 2076 PetscInt n; 2077 PetscBool flg_row; 2078 2079 /* B_V = B_V - A_VR^T */ 2080 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2081 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2082 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2083 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2084 for (i=0;i<n;i++) { 2085 PetscInt j; 2086 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2087 } 2088 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2089 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2090 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2091 } 2092 2093 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2094 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2095 for (i=0;i<n_vertices;i++) { 2096 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2097 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2098 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2099 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2100 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2101 } 2102 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2103 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2104 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2105 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2106 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2107 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2108 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2109 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2110 } 2111 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2112 /* coarse basis functions */ 2113 for (i=0;i<pcbddc->local_primal_size;i++) { 2114 PetscScalar *y; 2115 2116 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2117 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2118 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2119 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2121 if (i<n_vertices) { 2122 y[n_B*i+idx_V_B[i]] = 1.0; 2123 } 2124 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2125 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2126 2127 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2128 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2129 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2130 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2131 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2133 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2134 } 2135 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2136 } 2137 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2138 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2139 } 2140 /* free memory */ 2141 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2142 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2143 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2144 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2145 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2146 ierr = PetscFree(work);CHKERRQ(ierr); 2147 if (n_vertices) { 2148 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2149 } 2150 if (n_constraints) { 2151 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2152 } 2153 /* Checking coarse_sub_mat and coarse basis functios */ 2154 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2155 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2156 if (pcbddc->dbg_flag) { 2157 Mat coarse_sub_mat; 2158 Mat AUXMAT,TM1,TM2,TM3,TM4; 2159 Mat coarse_phi_D,coarse_phi_B; 2160 Mat coarse_psi_D,coarse_psi_B; 2161 Mat A_II,A_BB,A_IB,A_BI; 2162 Mat C_B,CPHI; 2163 IS is_dummy; 2164 Vec mones; 2165 MatType checkmattype=MATSEQAIJ; 2166 PetscReal real_value; 2167 2168 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2169 Mat A; 2170 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2171 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2172 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2173 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2174 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2175 ierr = MatDestroy(&A);CHKERRQ(ierr); 2176 } else { 2177 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2178 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2179 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2180 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2181 } 2182 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2183 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2184 if (!pcbddc->symmetric_primal) { 2185 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2186 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2187 } 2188 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2189 2190 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2191 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2192 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2193 if (!pcbddc->symmetric_primal) { 2194 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2195 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2196 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2197 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2198 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2199 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2200 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2201 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2202 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2203 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2204 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2205 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2206 } else { 2207 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2208 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2209 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2210 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2211 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2212 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2213 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2214 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2215 } 2216 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2217 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2218 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2219 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2220 if (pcbddc->benign_n) { 2221 Mat B0_B,B0_BPHI; 2222 PetscScalar *data,*data2; 2223 PetscInt j; 2224 2225 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2226 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2227 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2228 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2229 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2230 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2231 for (j=0;j<pcbddc->benign_n;j++) { 2232 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2233 for (i=0;i<pcbddc->local_primal_size;i++) { 2234 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2235 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2236 } 2237 } 2238 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2239 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2240 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2241 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2242 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2243 } 2244 #if 0 2245 { 2246 PetscViewer viewer; 2247 char filename[256]; 2248 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2249 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2250 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2252 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2253 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2254 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2255 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2256 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2257 if (pcbddc->coarse_phi_D) { 2258 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2259 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2260 } 2261 if (pcbddc->coarse_psi_B) { 2262 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2263 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2264 } 2265 if (pcbddc->coarse_phi_B) { 2266 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2267 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2268 } 2269 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2270 } 2271 #endif 2272 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2273 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2274 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2276 2277 /* check constraints */ 2278 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2279 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2280 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2281 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2282 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2283 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2284 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2285 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2287 if (!pcbddc->symmetric_primal) { 2288 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2289 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2290 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2291 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2292 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2293 } 2294 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2295 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2296 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2297 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2298 } 2299 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2300 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2301 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2302 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2303 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2304 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2305 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2306 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2307 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2308 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2309 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2310 if (!pcbddc->symmetric_primal) { 2311 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2312 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2313 } 2314 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2315 } 2316 /* get back data */ 2317 *coarse_submat_vals_n = coarse_submat_vals; 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2323 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2324 { 2325 Mat *work_mat; 2326 IS isrow_s,iscol_s; 2327 PetscBool rsorted,csorted; 2328 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2329 PetscErrorCode ierr; 2330 2331 PetscFunctionBegin; 2332 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2333 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2334 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2335 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2336 2337 if (!rsorted) { 2338 const PetscInt *idxs; 2339 PetscInt *idxs_sorted,i; 2340 2341 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2342 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2343 for (i=0;i<rsize;i++) { 2344 idxs_perm_r[i] = i; 2345 } 2346 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2347 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2348 for (i=0;i<rsize;i++) { 2349 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2350 } 2351 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2352 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2353 } else { 2354 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2355 isrow_s = isrow; 2356 } 2357 2358 if (!csorted) { 2359 if (isrow == iscol) { 2360 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2361 iscol_s = isrow_s; 2362 } else { 2363 const PetscInt *idxs; 2364 PetscInt *idxs_sorted,i; 2365 2366 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2367 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2368 for (i=0;i<csize;i++) { 2369 idxs_perm_c[i] = i; 2370 } 2371 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2372 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2373 for (i=0;i<csize;i++) { 2374 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2375 } 2376 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2377 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2378 } 2379 } else { 2380 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2381 iscol_s = iscol; 2382 } 2383 2384 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2385 2386 if (!rsorted || !csorted) { 2387 Mat new_mat; 2388 IS is_perm_r,is_perm_c; 2389 2390 if (!rsorted) { 2391 PetscInt *idxs_r,i; 2392 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2393 for (i=0;i<rsize;i++) { 2394 idxs_r[idxs_perm_r[i]] = i; 2395 } 2396 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2397 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2398 } else { 2399 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2400 } 2401 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2402 2403 if (!csorted) { 2404 if (isrow_s == iscol_s) { 2405 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2406 is_perm_c = is_perm_r; 2407 } else { 2408 PetscInt *idxs_c,i; 2409 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2410 for (i=0;i<csize;i++) { 2411 idxs_c[idxs_perm_c[i]] = i; 2412 } 2413 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2414 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2415 } 2416 } else { 2417 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2418 } 2419 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2420 2421 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2422 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2423 work_mat[0] = new_mat; 2424 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2425 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2426 } 2427 2428 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2429 *B = work_mat[0]; 2430 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2431 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2432 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2438 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2439 { 2440 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2441 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2442 Mat new_mat; 2443 IS is_local,is_global; 2444 PetscInt local_size; 2445 PetscBool isseqaij; 2446 PetscErrorCode ierr; 2447 2448 PetscFunctionBegin; 2449 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2450 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2451 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2452 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2453 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2454 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2455 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2456 2457 /* check */ 2458 if (pcbddc->dbg_flag) { 2459 Vec x,x_change; 2460 PetscReal error; 2461 2462 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2463 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2464 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2465 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2466 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2467 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2468 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2469 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2470 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2471 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2472 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2473 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2474 ierr = VecDestroy(&x);CHKERRQ(ierr); 2475 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2476 } 2477 2478 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2479 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2480 if (isseqaij) { 2481 Mat M; 2482 2483 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2484 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2485 ierr = MatDestroy(&M);CHKERRQ(ierr); 2486 } else { 2487 Mat work_mat,M; 2488 2489 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2490 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2491 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2492 ierr = MatDestroy(&M);CHKERRQ(ierr); 2493 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2494 } 2495 if (matis->A->symmetric_set) { 2496 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2497 #if !defined(PETSC_USE_COMPLEX) 2498 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2499 #endif 2500 } 2501 /* 2502 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2503 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2504 */ 2505 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2511 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2512 { 2513 PC_IS* pcis = (PC_IS*)(pc->data); 2514 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2515 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2516 PetscInt *idx_R_local=NULL; 2517 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2518 PetscInt vbs,bs; 2519 PetscBT bitmask=NULL; 2520 PetscErrorCode ierr; 2521 2522 PetscFunctionBegin; 2523 /* 2524 No need to setup local scatters if 2525 - primal space is unchanged 2526 AND 2527 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2528 AND 2529 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2530 */ 2531 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2532 PetscFunctionReturn(0); 2533 } 2534 /* destroy old objects */ 2535 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2536 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2537 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2538 /* Set Non-overlapping dimensions */ 2539 n_B = pcis->n_B; 2540 n_D = pcis->n - n_B; 2541 n_vertices = pcbddc->n_vertices; 2542 2543 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2544 2545 /* create auxiliary bitmask and allocate workspace */ 2546 if (!sub_schurs->reuse_solver) { 2547 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2548 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2549 for (i=0;i<n_vertices;i++) { 2550 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2551 } 2552 2553 for (i=0, n_R=0; i<pcis->n; i++) { 2554 if (!PetscBTLookup(bitmask,i)) { 2555 idx_R_local[n_R++] = i; 2556 } 2557 } 2558 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2559 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2560 2561 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2562 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2563 } 2564 2565 /* Block code */ 2566 vbs = 1; 2567 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2568 if (bs>1 && !(n_vertices%bs)) { 2569 PetscBool is_blocked = PETSC_TRUE; 2570 PetscInt *vary; 2571 if (!sub_schurs->reuse_solver) { 2572 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2573 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2574 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2575 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2576 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2577 for (i=0; i<pcis->n/bs; i++) { 2578 if (vary[i]!=0 && vary[i]!=bs) { 2579 is_blocked = PETSC_FALSE; 2580 break; 2581 } 2582 } 2583 ierr = PetscFree(vary);CHKERRQ(ierr); 2584 } else { 2585 /* Verify directly the R set */ 2586 for (i=0; i<n_R/bs; i++) { 2587 PetscInt j,node=idx_R_local[bs*i]; 2588 for (j=1; j<bs; j++) { 2589 if (node != idx_R_local[bs*i+j]-j) { 2590 is_blocked = PETSC_FALSE; 2591 break; 2592 } 2593 } 2594 } 2595 } 2596 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2597 vbs = bs; 2598 for (i=0;i<n_R/vbs;i++) { 2599 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2600 } 2601 } 2602 } 2603 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2604 if (sub_schurs->reuse_solver) { 2605 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2606 2607 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2608 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2609 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2610 reuse_solver->is_R = pcbddc->is_R_local; 2611 } else { 2612 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2613 } 2614 2615 /* print some info if requested */ 2616 if (pcbddc->dbg_flag) { 2617 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2618 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2619 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2623 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2624 } 2625 2626 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2627 if (!sub_schurs->reuse_solver) { 2628 IS is_aux1,is_aux2; 2629 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2630 2631 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2632 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2633 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2634 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2635 for (i=0; i<n_D; i++) { 2636 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2637 } 2638 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2639 for (i=0, j=0; i<n_R; i++) { 2640 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2641 aux_array1[j++] = i; 2642 } 2643 } 2644 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2645 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2646 for (i=0, j=0; i<n_B; i++) { 2647 if (!PetscBTLookup(bitmask,is_indices[i])) { 2648 aux_array2[j++] = i; 2649 } 2650 } 2651 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2652 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2653 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2654 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2655 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2656 2657 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2658 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2659 for (i=0, j=0; i<n_R; i++) { 2660 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2661 aux_array1[j++] = i; 2662 } 2663 } 2664 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2665 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2666 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2667 } 2668 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2669 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2670 } else { 2671 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2672 IS tis; 2673 PetscInt schur_size; 2674 2675 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2676 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2677 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2678 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2679 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2680 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2681 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2682 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2683 } 2684 } 2685 PetscFunctionReturn(0); 2686 } 2687 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2691 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2692 { 2693 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2694 PC_IS *pcis = (PC_IS*)pc->data; 2695 PC pc_temp; 2696 Mat A_RR; 2697 MatReuse reuse; 2698 PetscScalar m_one = -1.0; 2699 PetscReal value; 2700 PetscInt n_D,n_R; 2701 PetscBool use_exact,use_exact_reduced,issbaij; 2702 PetscErrorCode ierr; 2703 /* prefixes stuff */ 2704 char dir_prefix[256],neu_prefix[256],str_level[16]; 2705 size_t len; 2706 2707 PetscFunctionBegin; 2708 2709 /* compute prefixes */ 2710 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2711 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2712 if (!pcbddc->current_level) { 2713 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2714 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2715 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2716 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2717 } else { 2718 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2719 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2720 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2721 len -= 15; /* remove "pc_bddc_coarse_" */ 2722 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2723 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2724 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2725 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2726 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2727 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2728 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2729 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2730 } 2731 2732 /* DIRICHLET PROBLEM */ 2733 if (dirichlet) { 2734 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2735 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2736 if (!sub_schurs->reuse_solver) { 2737 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2738 } 2739 if (pcbddc->dbg_flag) { 2740 Mat A_IIn; 2741 2742 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2743 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2744 pcis->A_II = A_IIn; 2745 } 2746 } 2747 if (pcbddc->local_mat->symmetric_set) { 2748 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2749 } 2750 /* Matrix for Dirichlet problem is pcis->A_II */ 2751 n_D = pcis->n - pcis->n_B; 2752 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2753 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2754 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2755 /* default */ 2756 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2757 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2758 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2759 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2760 if (issbaij) { 2761 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2762 } else { 2763 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2764 } 2765 /* Allow user's customization */ 2766 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2767 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2768 } 2769 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2770 if (sub_schurs->reuse_solver) { 2771 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2772 2773 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2774 } 2775 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2776 if (!n_D) { 2777 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2778 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2779 } 2780 /* Set Up KSP for Dirichlet problem of BDDC */ 2781 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2782 /* set ksp_D into pcis data */ 2783 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2784 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2785 pcis->ksp_D = pcbddc->ksp_D; 2786 } 2787 2788 /* NEUMANN PROBLEM */ 2789 A_RR = 0; 2790 if (neumann) { 2791 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2792 PetscInt ibs,mbs; 2793 PetscBool issbaij; 2794 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2795 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2796 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2797 if (pcbddc->ksp_R) { /* already created ksp */ 2798 PetscInt nn_R; 2799 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2800 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2801 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2802 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2803 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2804 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2805 reuse = MAT_INITIAL_MATRIX; 2806 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2807 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2808 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2809 reuse = MAT_INITIAL_MATRIX; 2810 } else { /* safe to reuse the matrix */ 2811 reuse = MAT_REUSE_MATRIX; 2812 } 2813 } 2814 /* last check */ 2815 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2816 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2817 reuse = MAT_INITIAL_MATRIX; 2818 } 2819 } else { /* first time, so we need to create the matrix */ 2820 reuse = MAT_INITIAL_MATRIX; 2821 } 2822 /* extract A_RR */ 2823 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2824 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2825 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2826 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2827 if (matis->A == pcbddc->local_mat) { 2828 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2829 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2830 } else { 2831 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2832 } 2833 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2834 if (matis->A == pcbddc->local_mat) { 2835 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2836 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2837 } else { 2838 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2839 } 2840 } 2841 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2842 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2843 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2844 } else { 2845 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2846 } 2847 if (pcbddc->local_mat->symmetric_set) { 2848 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2849 } 2850 if (!pcbddc->ksp_R) { /* create object if not present */ 2851 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2852 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2853 /* default */ 2854 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2855 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2856 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2857 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2858 if (issbaij) { 2859 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2860 } else { 2861 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2862 } 2863 /* Allow user's customization */ 2864 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2865 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2866 } 2867 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2868 if (!n_R) { 2869 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2870 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2871 } 2872 /* Reuse solver if it is present */ 2873 if (sub_schurs->reuse_solver) { 2874 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2875 2876 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2877 } 2878 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2879 /* Set Up KSP for Neumann problem of BDDC */ 2880 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2881 } 2882 /* free Neumann problem's matrix */ 2883 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2884 2885 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2886 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2887 if (pcbddc->dbg_flag) { 2888 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2889 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2890 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2891 } 2892 if (dirichlet) { /* Dirichlet */ 2893 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2894 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2895 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2896 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2897 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2898 /* need to be adapted? */ 2899 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2900 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2901 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2902 /* print info */ 2903 if (pcbddc->dbg_flag) { 2904 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2906 } 2907 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2908 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2909 } 2910 } 2911 if (neumann) { /* Neumann */ 2912 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2913 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2914 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2915 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2916 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2917 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2918 /* need to be adapted? */ 2919 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2920 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2921 /* print info */ 2922 if (pcbddc->dbg_flag) { 2923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2924 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2925 } 2926 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2927 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2928 } 2929 } 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2936 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2937 { 2938 PetscErrorCode ierr; 2939 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2940 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2941 2942 PetscFunctionBegin; 2943 if (!sub_schurs->reuse_solver) { 2944 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2945 } 2946 if (!pcbddc->switch_static) { 2947 if (applytranspose && pcbddc->local_auxmat1) { 2948 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2949 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2950 } 2951 if (!sub_schurs->reuse_solver) { 2952 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2953 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2954 } else { 2955 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2956 2957 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2958 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2959 } 2960 } else { 2961 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2962 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2963 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2964 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2965 if (applytranspose && pcbddc->local_auxmat1) { 2966 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2967 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2968 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2969 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2970 } 2971 } 2972 if (!sub_schurs->reuse_solver) { 2973 if (applytranspose) { 2974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2975 } else { 2976 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2977 } 2978 } else { 2979 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2980 2981 if (applytranspose) { 2982 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2983 } else { 2984 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2985 } 2986 } 2987 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2988 if (!pcbddc->switch_static) { 2989 if (!sub_schurs->reuse_solver) { 2990 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2991 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2992 } else { 2993 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2994 2995 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2996 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2997 } 2998 if (!applytranspose && pcbddc->local_auxmat1) { 2999 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3000 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3001 } 3002 } else { 3003 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3004 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3005 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3006 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3007 if (!applytranspose && pcbddc->local_auxmat1) { 3008 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3009 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3010 } 3011 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3012 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3015 } 3016 PetscFunctionReturn(0); 3017 } 3018 3019 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3020 #undef __FUNCT__ 3021 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3022 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3023 { 3024 PetscErrorCode ierr; 3025 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3026 PC_IS* pcis = (PC_IS*) (pc->data); 3027 const PetscScalar zero = 0.0; 3028 3029 PetscFunctionBegin; 3030 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3031 if (!pcbddc->benign_apply_coarse_only) { 3032 if (applytranspose) { 3033 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3034 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3035 } else { 3036 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3037 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3038 } 3039 } else { 3040 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3041 } 3042 3043 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3044 if (pcbddc->benign_n) { 3045 PetscScalar *array; 3046 PetscInt j; 3047 3048 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3049 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3050 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3051 } 3052 3053 /* start communications from local primal nodes to rhs of coarse solver */ 3054 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3055 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3056 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3057 3058 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3059 /* TODO remove null space when doing multilevel */ 3060 if (pcbddc->coarse_ksp) { 3061 Mat coarse_mat; 3062 Vec rhs,sol; 3063 MatNullSpace nullsp; 3064 3065 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3066 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3067 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3068 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3069 if (nullsp) { 3070 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3071 } 3072 if (applytranspose) { 3073 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3074 } else { 3075 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3076 } 3077 if (nullsp) { 3078 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3079 } 3080 } 3081 3082 /* Local solution on R nodes */ 3083 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3084 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3085 } 3086 /* communications from coarse sol to local primal nodes */ 3087 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3088 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3089 3090 /* Sum contributions from the two levels */ 3091 if (!pcbddc->benign_apply_coarse_only) { 3092 if (applytranspose) { 3093 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3094 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3095 } else { 3096 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3097 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3098 } 3099 /* store p0 */ 3100 if (pcbddc->benign_n) { 3101 PetscScalar *array; 3102 PetscInt j; 3103 3104 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3105 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3106 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3107 } 3108 } else { /* expand the coarse solution */ 3109 if (applytranspose) { 3110 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3111 } else { 3112 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3113 } 3114 } 3115 PetscFunctionReturn(0); 3116 } 3117 3118 #undef __FUNCT__ 3119 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3120 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3121 { 3122 PetscErrorCode ierr; 3123 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3124 PetscScalar *array; 3125 Vec from,to; 3126 3127 PetscFunctionBegin; 3128 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3129 from = pcbddc->coarse_vec; 3130 to = pcbddc->vec1_P; 3131 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3132 Vec tvec; 3133 3134 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3135 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3136 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3137 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3138 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3139 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3140 } 3141 } else { /* from local to global -> put data in coarse right hand side */ 3142 from = pcbddc->vec1_P; 3143 to = pcbddc->coarse_vec; 3144 } 3145 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3146 PetscFunctionReturn(0); 3147 } 3148 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3151 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3152 { 3153 PetscErrorCode ierr; 3154 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3155 PetscScalar *array; 3156 Vec from,to; 3157 3158 PetscFunctionBegin; 3159 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3160 from = pcbddc->coarse_vec; 3161 to = pcbddc->vec1_P; 3162 } else { /* from local to global -> put data in coarse right hand side */ 3163 from = pcbddc->vec1_P; 3164 to = pcbddc->coarse_vec; 3165 } 3166 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3167 if (smode == SCATTER_FORWARD) { 3168 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3169 Vec tvec; 3170 3171 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3172 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3173 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3174 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3175 } 3176 } else { 3177 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3178 ierr = VecResetArray(from);CHKERRQ(ierr); 3179 } 3180 } 3181 PetscFunctionReturn(0); 3182 } 3183 3184 /* uncomment for testing purposes */ 3185 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3186 #undef __FUNCT__ 3187 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3188 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3189 { 3190 PetscErrorCode ierr; 3191 PC_IS* pcis = (PC_IS*)(pc->data); 3192 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3193 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3194 /* one and zero */ 3195 PetscScalar one=1.0,zero=0.0; 3196 /* space to store constraints and their local indices */ 3197 PetscScalar *constraints_data; 3198 PetscInt *constraints_idxs,*constraints_idxs_B; 3199 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3200 PetscInt *constraints_n; 3201 /* iterators */ 3202 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3203 /* BLAS integers */ 3204 PetscBLASInt lwork,lierr; 3205 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3206 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3207 /* reuse */ 3208 PetscInt olocal_primal_size,olocal_primal_size_cc; 3209 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3210 /* change of basis */ 3211 PetscBool qr_needed; 3212 PetscBT change_basis,qr_needed_idx; 3213 /* auxiliary stuff */ 3214 PetscInt *nnz,*is_indices; 3215 PetscInt ncc; 3216 /* some quantities */ 3217 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3218 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3219 3220 PetscFunctionBegin; 3221 /* Destroy Mat objects computed previously */ 3222 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3223 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3224 /* save info on constraints from previous setup (if any) */ 3225 olocal_primal_size = pcbddc->local_primal_size; 3226 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3227 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3228 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3229 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3230 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3231 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3232 3233 /* print some info */ 3234 if (pcbddc->dbg_flag) { 3235 IS vertices; 3236 PetscInt nv,nedges,nfaces; 3237 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3238 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3239 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3240 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3243 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3245 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3246 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3247 } 3248 3249 if (!pcbddc->adaptive_selection) { 3250 IS ISForVertices,*ISForFaces,*ISForEdges; 3251 MatNullSpace nearnullsp; 3252 const Vec *nearnullvecs; 3253 Vec *localnearnullsp; 3254 PetscScalar *array; 3255 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3256 PetscBool nnsp_has_cnst; 3257 /* LAPACK working arrays for SVD or POD */ 3258 PetscBool skip_lapack,boolforchange; 3259 PetscScalar *work; 3260 PetscReal *singular_vals; 3261 #if defined(PETSC_USE_COMPLEX) 3262 PetscReal *rwork; 3263 #endif 3264 #if defined(PETSC_MISSING_LAPACK_GESVD) 3265 PetscScalar *temp_basis,*correlation_mat; 3266 #else 3267 PetscBLASInt dummy_int=1; 3268 PetscScalar dummy_scalar=1.; 3269 #endif 3270 3271 /* Get index sets for faces, edges and vertices from graph */ 3272 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3273 /* free unneeded index sets */ 3274 if (!pcbddc->use_vertices) { 3275 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3276 } 3277 if (!pcbddc->use_edges) { 3278 for (i=0;i<n_ISForEdges;i++) { 3279 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3280 } 3281 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3282 n_ISForEdges = 0; 3283 } 3284 if (!pcbddc->use_faces) { 3285 for (i=0;i<n_ISForFaces;i++) { 3286 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3287 } 3288 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3289 n_ISForFaces = 0; 3290 } 3291 3292 #if defined(PETSC_USE_DEBUG) 3293 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3294 Also use_change_of_basis should be consistent among processors */ 3295 if (pcbddc->NullSpace) { 3296 PetscBool tbool[2],gbool[2]; 3297 3298 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3299 pcbddc->use_change_of_basis = PETSC_TRUE; 3300 if (!ISForEdges) { 3301 pcbddc->use_change_on_faces = PETSC_TRUE; 3302 } 3303 } 3304 tbool[0] = pcbddc->use_change_of_basis; 3305 tbool[1] = pcbddc->use_change_on_faces; 3306 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3307 pcbddc->use_change_of_basis = gbool[0]; 3308 pcbddc->use_change_on_faces = gbool[1]; 3309 } 3310 #endif 3311 3312 /* check if near null space is attached to global mat */ 3313 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3314 if (nearnullsp) { 3315 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3316 /* remove any stored info */ 3317 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3318 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3319 /* store information for BDDC solver reuse */ 3320 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3321 pcbddc->onearnullspace = nearnullsp; 3322 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3323 for (i=0;i<nnsp_size;i++) { 3324 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3325 } 3326 } else { /* if near null space is not provided BDDC uses constants by default */ 3327 nnsp_size = 0; 3328 nnsp_has_cnst = PETSC_TRUE; 3329 } 3330 /* get max number of constraints on a single cc */ 3331 max_constraints = nnsp_size; 3332 if (nnsp_has_cnst) max_constraints++; 3333 3334 /* 3335 Evaluate maximum storage size needed by the procedure 3336 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3337 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3338 There can be multiple constraints per connected component 3339 */ 3340 n_vertices = 0; 3341 if (ISForVertices) { 3342 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3343 } 3344 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3345 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3346 3347 total_counts = n_ISForFaces+n_ISForEdges; 3348 total_counts *= max_constraints; 3349 total_counts += n_vertices; 3350 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3351 3352 total_counts = 0; 3353 max_size_of_constraint = 0; 3354 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3355 IS used_is; 3356 if (i<n_ISForEdges) { 3357 used_is = ISForEdges[i]; 3358 } else { 3359 used_is = ISForFaces[i-n_ISForEdges]; 3360 } 3361 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3362 total_counts += j; 3363 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3364 } 3365 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3366 3367 /* get local part of global near null space vectors */ 3368 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3369 for (k=0;k<nnsp_size;k++) { 3370 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3371 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3372 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3373 } 3374 3375 /* whether or not to skip lapack calls */ 3376 skip_lapack = PETSC_TRUE; 3377 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3378 3379 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3380 if (!skip_lapack) { 3381 PetscScalar temp_work; 3382 3383 #if defined(PETSC_MISSING_LAPACK_GESVD) 3384 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3385 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3386 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3387 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3388 #if defined(PETSC_USE_COMPLEX) 3389 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3390 #endif 3391 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3392 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3393 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3394 lwork = -1; 3395 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3396 #if !defined(PETSC_USE_COMPLEX) 3397 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3398 #else 3399 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3400 #endif 3401 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3402 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3403 #else /* on missing GESVD */ 3404 /* SVD */ 3405 PetscInt max_n,min_n; 3406 max_n = max_size_of_constraint; 3407 min_n = max_constraints; 3408 if (max_size_of_constraint < max_constraints) { 3409 min_n = max_size_of_constraint; 3410 max_n = max_constraints; 3411 } 3412 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3413 #if defined(PETSC_USE_COMPLEX) 3414 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3415 #endif 3416 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3417 lwork = -1; 3418 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3419 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3420 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3421 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3422 #if !defined(PETSC_USE_COMPLEX) 3423 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3424 #else 3425 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3426 #endif 3427 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3428 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3429 #endif /* on missing GESVD */ 3430 /* Allocate optimal workspace */ 3431 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3432 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3433 } 3434 /* Now we can loop on constraining sets */ 3435 total_counts = 0; 3436 constraints_idxs_ptr[0] = 0; 3437 constraints_data_ptr[0] = 0; 3438 /* vertices */ 3439 if (n_vertices) { 3440 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3441 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3442 for (i=0;i<n_vertices;i++) { 3443 constraints_n[total_counts] = 1; 3444 constraints_data[total_counts] = 1.0; 3445 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3446 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3447 total_counts++; 3448 } 3449 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3450 n_vertices = total_counts; 3451 } 3452 3453 /* edges and faces */ 3454 total_counts_cc = total_counts; 3455 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3456 IS used_is; 3457 PetscBool idxs_copied = PETSC_FALSE; 3458 3459 if (ncc<n_ISForEdges) { 3460 used_is = ISForEdges[ncc]; 3461 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3462 } else { 3463 used_is = ISForFaces[ncc-n_ISForEdges]; 3464 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3465 } 3466 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3467 3468 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3469 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3470 /* change of basis should not be performed on local periodic nodes */ 3471 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3472 if (nnsp_has_cnst) { 3473 PetscScalar quad_value; 3474 3475 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3476 idxs_copied = PETSC_TRUE; 3477 3478 if (!pcbddc->use_nnsp_true) { 3479 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3480 } else { 3481 quad_value = 1.0; 3482 } 3483 for (j=0;j<size_of_constraint;j++) { 3484 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3485 } 3486 temp_constraints++; 3487 total_counts++; 3488 } 3489 for (k=0;k<nnsp_size;k++) { 3490 PetscReal real_value; 3491 PetscScalar *ptr_to_data; 3492 3493 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3494 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3495 for (j=0;j<size_of_constraint;j++) { 3496 ptr_to_data[j] = array[is_indices[j]]; 3497 } 3498 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3499 /* check if array is null on the connected component */ 3500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3501 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3502 if (real_value > 0.0) { /* keep indices and values */ 3503 temp_constraints++; 3504 total_counts++; 3505 if (!idxs_copied) { 3506 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3507 idxs_copied = PETSC_TRUE; 3508 } 3509 } 3510 } 3511 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3512 valid_constraints = temp_constraints; 3513 if (!pcbddc->use_nnsp_true && temp_constraints) { 3514 if (temp_constraints == 1) { /* just normalize the constraint */ 3515 PetscScalar norm,*ptr_to_data; 3516 3517 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3519 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3520 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3521 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3522 } else { /* perform SVD */ 3523 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3524 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3525 3526 #if defined(PETSC_MISSING_LAPACK_GESVD) 3527 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3528 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3529 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3530 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3531 from that computed using LAPACKgesvd 3532 -> This is due to a different computation of eigenvectors in LAPACKheev 3533 -> The quality of the POD-computed basis will be the same */ 3534 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3535 /* Store upper triangular part of correlation matrix */ 3536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3538 for (j=0;j<temp_constraints;j++) { 3539 for (k=0;k<j+1;k++) { 3540 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3541 } 3542 } 3543 /* compute eigenvalues and eigenvectors of correlation matrix */ 3544 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3545 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3546 #if !defined(PETSC_USE_COMPLEX) 3547 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3548 #else 3549 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3550 #endif 3551 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3552 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3553 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3554 j = 0; 3555 while (j < temp_constraints && singular_vals[j] < tol) j++; 3556 total_counts = total_counts-j; 3557 valid_constraints = temp_constraints-j; 3558 /* scale and copy POD basis into used quadrature memory */ 3559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3560 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3561 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3563 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3564 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3565 if (j<temp_constraints) { 3566 PetscInt ii; 3567 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3568 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3569 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3570 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3571 for (k=0;k<temp_constraints-j;k++) { 3572 for (ii=0;ii<size_of_constraint;ii++) { 3573 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3574 } 3575 } 3576 } 3577 #else /* on missing GESVD */ 3578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3579 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3581 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3582 #if !defined(PETSC_USE_COMPLEX) 3583 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3584 #else 3585 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3586 #endif 3587 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3588 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3589 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3590 k = temp_constraints; 3591 if (k > size_of_constraint) k = size_of_constraint; 3592 j = 0; 3593 while (j < k && singular_vals[k-j-1] < tol) j++; 3594 valid_constraints = k-j; 3595 total_counts = total_counts-temp_constraints+valid_constraints; 3596 #endif /* on missing GESVD */ 3597 } 3598 } 3599 /* update pointers information */ 3600 if (valid_constraints) { 3601 constraints_n[total_counts_cc] = valid_constraints; 3602 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3603 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3604 /* set change_of_basis flag */ 3605 if (boolforchange) { 3606 PetscBTSet(change_basis,total_counts_cc); 3607 } 3608 total_counts_cc++; 3609 } 3610 } 3611 /* free workspace */ 3612 if (!skip_lapack) { 3613 ierr = PetscFree(work);CHKERRQ(ierr); 3614 #if defined(PETSC_USE_COMPLEX) 3615 ierr = PetscFree(rwork);CHKERRQ(ierr); 3616 #endif 3617 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3618 #if defined(PETSC_MISSING_LAPACK_GESVD) 3619 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3620 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3621 #endif 3622 } 3623 for (k=0;k<nnsp_size;k++) { 3624 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3625 } 3626 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3627 /* free index sets of faces, edges and vertices */ 3628 for (i=0;i<n_ISForFaces;i++) { 3629 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3630 } 3631 if (n_ISForFaces) { 3632 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3633 } 3634 for (i=0;i<n_ISForEdges;i++) { 3635 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3636 } 3637 if (n_ISForEdges) { 3638 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3639 } 3640 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3641 } else { 3642 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3643 3644 total_counts = 0; 3645 n_vertices = 0; 3646 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3647 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3648 } 3649 max_constraints = 0; 3650 total_counts_cc = 0; 3651 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3652 total_counts += pcbddc->adaptive_constraints_n[i]; 3653 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3654 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3655 } 3656 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3657 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3658 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3659 constraints_data = pcbddc->adaptive_constraints_data; 3660 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3661 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3662 total_counts_cc = 0; 3663 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3664 if (pcbddc->adaptive_constraints_n[i]) { 3665 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3666 } 3667 } 3668 #if 0 3669 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3670 for (i=0;i<total_counts_cc;i++) { 3671 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3672 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3673 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3674 printf(" %d",constraints_idxs[j]); 3675 } 3676 printf("\n"); 3677 printf("number of cc: %d\n",constraints_n[i]); 3678 } 3679 for (i=0;i<n_vertices;i++) { 3680 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3681 } 3682 for (i=0;i<sub_schurs->n_subs;i++) { 3683 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3684 } 3685 #endif 3686 3687 max_size_of_constraint = 0; 3688 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3689 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3690 /* Change of basis */ 3691 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3692 if (pcbddc->use_change_of_basis) { 3693 for (i=0;i<sub_schurs->n_subs;i++) { 3694 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3695 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3696 } 3697 } 3698 } 3699 } 3700 pcbddc->local_primal_size = total_counts; 3701 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3702 3703 /* map constraints_idxs in boundary numbering */ 3704 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3705 if (i != constraints_idxs_ptr[total_counts_cc]) { 3706 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3707 } 3708 3709 /* Create constraint matrix */ 3710 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3711 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3712 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3713 3714 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3715 /* determine if a QR strategy is needed for change of basis */ 3716 qr_needed = PETSC_FALSE; 3717 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3718 total_primal_vertices=0; 3719 pcbddc->local_primal_size_cc = 0; 3720 for (i=0;i<total_counts_cc;i++) { 3721 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3722 if (size_of_constraint == 1) { 3723 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3724 pcbddc->local_primal_size_cc += 1; 3725 } else if (PetscBTLookup(change_basis,i)) { 3726 for (k=0;k<constraints_n[i];k++) { 3727 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3728 } 3729 pcbddc->local_primal_size_cc += constraints_n[i]; 3730 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3731 PetscBTSet(qr_needed_idx,i); 3732 qr_needed = PETSC_TRUE; 3733 } 3734 } else { 3735 pcbddc->local_primal_size_cc += 1; 3736 } 3737 } 3738 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3739 pcbddc->n_vertices = total_primal_vertices; 3740 /* permute indices in order to have a sorted set of vertices */ 3741 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3742 3743 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3744 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3745 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3746 3747 /* nonzero structure of constraint matrix */ 3748 /* and get reference dof for local constraints */ 3749 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3750 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3751 3752 j = total_primal_vertices; 3753 total_counts = total_primal_vertices; 3754 cum = total_primal_vertices; 3755 for (i=n_vertices;i<total_counts_cc;i++) { 3756 if (!PetscBTLookup(change_basis,i)) { 3757 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3758 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3759 cum++; 3760 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3761 for (k=0;k<constraints_n[i];k++) { 3762 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3763 nnz[j+k] = size_of_constraint; 3764 } 3765 j += constraints_n[i]; 3766 } 3767 } 3768 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3769 ierr = PetscFree(nnz);CHKERRQ(ierr); 3770 3771 /* set values in constraint matrix */ 3772 for (i=0;i<total_primal_vertices;i++) { 3773 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3774 } 3775 total_counts = total_primal_vertices; 3776 for (i=n_vertices;i<total_counts_cc;i++) { 3777 if (!PetscBTLookup(change_basis,i)) { 3778 PetscInt *cols; 3779 3780 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3781 cols = constraints_idxs+constraints_idxs_ptr[i]; 3782 for (k=0;k<constraints_n[i];k++) { 3783 PetscInt row = total_counts+k; 3784 PetscScalar *vals; 3785 3786 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3787 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3788 } 3789 total_counts += constraints_n[i]; 3790 } 3791 } 3792 /* assembling */ 3793 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3794 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3795 3796 /* 3797 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3798 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3799 */ 3800 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3801 if (pcbddc->use_change_of_basis) { 3802 /* dual and primal dofs on a single cc */ 3803 PetscInt dual_dofs,primal_dofs; 3804 /* working stuff for GEQRF */ 3805 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3806 PetscBLASInt lqr_work; 3807 /* working stuff for UNGQR */ 3808 PetscScalar *gqr_work,lgqr_work_t; 3809 PetscBLASInt lgqr_work; 3810 /* working stuff for TRTRS */ 3811 PetscScalar *trs_rhs; 3812 PetscBLASInt Blas_NRHS; 3813 /* pointers for values insertion into change of basis matrix */ 3814 PetscInt *start_rows,*start_cols; 3815 PetscScalar *start_vals; 3816 /* working stuff for values insertion */ 3817 PetscBT is_primal; 3818 PetscInt *aux_primal_numbering_B; 3819 /* matrix sizes */ 3820 PetscInt global_size,local_size; 3821 /* temporary change of basis */ 3822 Mat localChangeOfBasisMatrix; 3823 /* extra space for debugging */ 3824 PetscScalar *dbg_work; 3825 3826 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3827 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3828 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3829 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3830 /* nonzeros for local mat */ 3831 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3832 for (i=0;i<pcis->n;i++) nnz[i]=1; 3833 for (i=n_vertices;i<total_counts_cc;i++) { 3834 if (PetscBTLookup(change_basis,i)) { 3835 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3836 if (PetscBTLookup(qr_needed_idx,i)) { 3837 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3838 } else { 3839 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3840 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3841 } 3842 } 3843 } 3844 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3845 ierr = PetscFree(nnz);CHKERRQ(ierr); 3846 /* Set initial identity in the matrix */ 3847 for (i=0;i<pcis->n;i++) { 3848 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3849 } 3850 3851 if (pcbddc->dbg_flag) { 3852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3853 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3854 } 3855 3856 3857 /* Now we loop on the constraints which need a change of basis */ 3858 /* 3859 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3860 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3861 3862 Basic blocks of change of basis matrix T computed by 3863 3864 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3865 3866 | 1 0 ... 0 s_1/S | 3867 | 0 1 ... 0 s_2/S | 3868 | ... | 3869 | 0 ... 1 s_{n-1}/S | 3870 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3871 3872 with S = \sum_{i=1}^n s_i^2 3873 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3874 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3875 3876 - QR decomposition of constraints otherwise 3877 */ 3878 if (qr_needed) { 3879 /* space to store Q */ 3880 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3881 /* first we issue queries for optimal work */ 3882 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3883 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3884 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3885 lqr_work = -1; 3886 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3887 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3888 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3889 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3890 lgqr_work = -1; 3891 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3892 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3893 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3894 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3895 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3896 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3897 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3898 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3899 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3900 /* array to store scaling factors for reflectors */ 3901 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3902 /* array to store rhs and solution of triangular solver */ 3903 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3904 /* allocating workspace for check */ 3905 if (pcbddc->dbg_flag) { 3906 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3907 } 3908 } 3909 /* array to store whether a node is primal or not */ 3910 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3911 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3912 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3913 if (i != total_primal_vertices) { 3914 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3915 } 3916 for (i=0;i<total_primal_vertices;i++) { 3917 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3918 } 3919 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3920 3921 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3922 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3923 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3924 if (PetscBTLookup(change_basis,total_counts)) { 3925 /* get constraint info */ 3926 primal_dofs = constraints_n[total_counts]; 3927 dual_dofs = size_of_constraint-primal_dofs; 3928 3929 if (pcbddc->dbg_flag) { 3930 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3931 } 3932 3933 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3934 3935 /* copy quadrature constraints for change of basis check */ 3936 if (pcbddc->dbg_flag) { 3937 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3938 } 3939 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3940 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3941 3942 /* compute QR decomposition of constraints */ 3943 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3944 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3947 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3948 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3949 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3950 3951 /* explictly compute R^-T */ 3952 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3953 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3954 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3955 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3957 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3958 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3959 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3960 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3961 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3962 3963 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3964 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3966 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3968 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3969 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3970 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3971 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3972 3973 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3974 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3975 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3977 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3979 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3980 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3982 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3983 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3984 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3985 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3986 3987 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3988 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3989 /* insert cols for primal dofs */ 3990 for (j=0;j<primal_dofs;j++) { 3991 start_vals = &qr_basis[j*size_of_constraint]; 3992 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3993 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3994 } 3995 /* insert cols for dual dofs */ 3996 for (j=0,k=0;j<dual_dofs;k++) { 3997 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3998 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3999 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4000 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4001 j++; 4002 } 4003 } 4004 4005 /* check change of basis */ 4006 if (pcbddc->dbg_flag) { 4007 PetscInt ii,jj; 4008 PetscBool valid_qr=PETSC_TRUE; 4009 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4010 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4011 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4014 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4015 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4016 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4018 for (jj=0;jj<size_of_constraint;jj++) { 4019 for (ii=0;ii<primal_dofs;ii++) { 4020 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4021 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4022 } 4023 } 4024 if (!valid_qr) { 4025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4026 for (jj=0;jj<size_of_constraint;jj++) { 4027 for (ii=0;ii<primal_dofs;ii++) { 4028 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4029 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4030 } 4031 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4032 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4033 } 4034 } 4035 } 4036 } else { 4037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4038 } 4039 } 4040 } else { /* simple transformation block */ 4041 PetscInt row,col; 4042 PetscScalar val,norm; 4043 4044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4045 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4046 for (j=0;j<size_of_constraint;j++) { 4047 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4048 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4049 if (!PetscBTLookup(is_primal,row_B)) { 4050 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4051 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4052 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4053 } else { 4054 for (k=0;k<size_of_constraint;k++) { 4055 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4056 if (row != col) { 4057 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4058 } else { 4059 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4060 } 4061 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4062 } 4063 } 4064 } 4065 if (pcbddc->dbg_flag) { 4066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4067 } 4068 } 4069 } else { 4070 if (pcbddc->dbg_flag) { 4071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4072 } 4073 } 4074 } 4075 4076 /* free workspace */ 4077 if (qr_needed) { 4078 if (pcbddc->dbg_flag) { 4079 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4080 } 4081 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4082 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4083 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4084 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4085 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4086 } 4087 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4088 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4089 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4090 4091 /* assembling of global change of variable */ 4092 if (!pcbddc->fake_change) { 4093 Mat tmat; 4094 PetscInt bs; 4095 4096 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4097 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4098 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4099 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4100 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4101 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4102 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4103 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4104 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4105 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4106 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4107 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4108 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4109 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4110 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4111 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4112 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4113 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4114 4115 /* check */ 4116 if (pcbddc->dbg_flag) { 4117 PetscReal error; 4118 Vec x,x_change; 4119 4120 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4121 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4122 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4123 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4124 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4125 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4126 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4127 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4128 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4129 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4130 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4131 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4132 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4134 ierr = VecDestroy(&x);CHKERRQ(ierr); 4135 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4136 } 4137 } 4138 4139 /* adapt sub_schurs computed (if any) */ 4140 if (pcbddc->use_deluxe_scaling) { 4141 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4142 4143 if (pcbddc->fake_change && !pcbddc->deluxe_zerorows) { 4144 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");CHKERRQ(ierr); 4145 } 4146 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4147 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4148 } 4149 if (sub_schurs->S_Ej_all) { 4150 Mat S_new,tmat; 4151 IS is_all_N,is_V_Sall = NULL; 4152 4153 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4154 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4155 if (pcbddc->deluxe_zerorows) { 4156 ISLocalToGlobalMapping NtoSall; 4157 IS is_V; 4158 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4159 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4160 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4161 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4162 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4163 } 4164 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4165 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4166 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4167 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4168 if (pcbddc->deluxe_zerorows) { 4169 const PetscScalar *array; 4170 const PetscInt *idxs_V,*idxs_all; 4171 PetscInt i,n_V; 4172 4173 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4174 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4175 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4176 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4177 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4178 for (i=0;i<n_V;i++) { 4179 PetscScalar val; 4180 PetscInt idx; 4181 4182 idx = idxs_V[i]; 4183 val = array[idxs_all[idxs_V[i]]]; 4184 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4185 } 4186 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4187 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4188 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4189 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4190 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4191 } 4192 sub_schurs->S_Ej_all = S_new; 4193 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4194 if (sub_schurs->sum_S_Ej_all) { 4195 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4196 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4197 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4198 if (pcbddc->deluxe_zerorows) { 4199 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4200 } 4201 sub_schurs->sum_S_Ej_all = S_new; 4202 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4203 } 4204 if (sub_schurs->sum_S_Ej_inv_all && pcbddc->fake_change && pcbddc->deluxe_zerorows) { 4205 PetscScalar infty = PETSC_MAX_REAL; 4206 ierr = MatPtAP(sub_schurs->sum_S_Ej_inv_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4207 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 4208 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4209 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4210 sub_schurs->sum_S_Ej_inv_all = S_new; 4211 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4212 ierr = MatPtAP(sub_schurs->sum_S_Ej_tilda_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4213 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 4214 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4215 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,infty,NULL,NULL);CHKERRQ(ierr); 4216 sub_schurs->sum_S_Ej_tilda_all = S_new; 4217 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4218 } 4219 4220 if (!pcbddc->fake_change) { 4221 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4222 } else { 4223 if (!sub_schurs->change_primal) { 4224 sub_schurs->change_primal = is_V_Sall; 4225 } else { 4226 #if defined(PETSC_USE_DEBUG) 4227 IS ist; 4228 PetscInt n; 4229 4230 ierr = ISDifference(sub_schurs->change_primal,is_V_Sall,&ist);CHKERRQ(ierr); 4231 ierr = ISGetLocalSize(ist,&n);CHKERRQ(ierr); 4232 if (n) { 4233 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 4234 } 4235 ierr = ISDestroy(&ist);CHKERRQ(ierr); 4236 #endif 4237 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4238 } 4239 if (!sub_schurs->change) { 4240 ierr = KSPCreate(PETSC_COMM_SELF,&sub_schurs->change);CHKERRQ(ierr); 4241 ierr = KSPSetOperators(sub_schurs->change,tmat,tmat);CHKERRQ(ierr); 4242 #if defined(PETSC_USE_DEBUG) 4243 } else { 4244 Mat At; 4245 PetscReal norm; 4246 4247 ierr = KSPGetOperators(sub_schurs->change,&At,NULL);CHKERRQ(ierr); 4248 ierr = MatAXPY(tmat,-1.,At,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4249 ierr = MatNorm(tmat,NORM_INFINITY,&norm);CHKERRQ(ierr); 4250 if (norm > PETSC_SMALL) { 4251 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 4252 } 4253 #endif 4254 } 4255 } 4256 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4257 } 4258 } 4259 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4260 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4261 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4262 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4263 } 4264 4265 /* set up change of basis context */ 4266 if (pcbddc->ChangeOfBasisMatrix) { 4267 PCBDDCChange_ctx change_ctx; 4268 4269 if (!pcbddc->new_global_mat) { 4270 PetscInt global_size,local_size; 4271 4272 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4273 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4274 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4275 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4276 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4277 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4278 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4279 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4280 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4281 } else { 4282 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4283 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4284 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4285 } 4286 if (!pcbddc->user_ChangeOfBasisMatrix) { 4287 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4288 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4289 } else { 4290 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4291 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4292 } 4293 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4294 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4295 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4296 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4297 } else { 4298 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4299 } 4300 4301 /* add pressure dofs to set of primal nodes for numbering purposes */ 4302 for (i=0;i<pcbddc->benign_n;i++) { 4303 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4304 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4305 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4306 pcbddc->local_primal_size_cc++; 4307 pcbddc->local_primal_size++; 4308 } 4309 4310 /* check if a new primal space has been introduced (also take into account benign trick) */ 4311 pcbddc->new_primal_space_local = PETSC_TRUE; 4312 if (olocal_primal_size == pcbddc->local_primal_size) { 4313 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4314 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4315 if (!pcbddc->new_primal_space_local) { 4316 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4317 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4318 } 4319 } 4320 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4321 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4322 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4323 4324 /* flush dbg viewer */ 4325 if (pcbddc->dbg_flag) { 4326 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4327 } 4328 4329 /* free workspace */ 4330 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4331 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4332 if (!pcbddc->adaptive_selection) { 4333 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4334 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4335 } else { 4336 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4337 pcbddc->adaptive_constraints_idxs_ptr, 4338 pcbddc->adaptive_constraints_data_ptr, 4339 pcbddc->adaptive_constraints_idxs, 4340 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4341 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4342 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4343 } 4344 PetscFunctionReturn(0); 4345 } 4346 4347 #undef __FUNCT__ 4348 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4349 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4350 { 4351 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4352 PC_IS *pcis = (PC_IS*)pc->data; 4353 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4354 PetscInt ierr,i,vertex_size,N; 4355 PetscViewer viewer=pcbddc->dbg_viewer; 4356 4357 PetscFunctionBegin; 4358 /* Reset previously computed graph */ 4359 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4360 /* Init local Graph struct */ 4361 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4362 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4363 4364 /* Check validity of the csr graph passed in by the user */ 4365 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4366 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4367 } 4368 4369 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4370 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4371 PetscInt *xadj,*adjncy; 4372 PetscInt nvtxs; 4373 PetscBool flg_row=PETSC_FALSE; 4374 4375 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4376 if (flg_row) { 4377 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4378 pcbddc->computed_rowadj = PETSC_TRUE; 4379 } 4380 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4381 } 4382 if (pcbddc->dbg_flag) { 4383 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4384 } 4385 4386 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4387 vertex_size = 1; 4388 if (pcbddc->user_provided_isfordofs) { 4389 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4390 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4391 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4392 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4393 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4394 } 4395 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4396 pcbddc->n_ISForDofs = 0; 4397 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4398 } 4399 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4400 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4401 } else { 4402 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4403 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4404 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4405 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4406 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4407 } 4408 } 4409 } 4410 4411 /* Setup of Graph */ 4412 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4413 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4414 } 4415 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4416 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4417 } 4418 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4419 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4420 } 4421 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4422 4423 /* attach info on disconnected subdomains if present */ 4424 if (pcbddc->n_local_subs) { 4425 PetscInt *local_subs; 4426 4427 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4428 for (i=0;i<pcbddc->n_local_subs;i++) { 4429 const PetscInt *idxs; 4430 PetscInt nl,j; 4431 4432 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4433 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4434 for (j=0;j<nl;j++) { 4435 local_subs[idxs[j]] = i; 4436 } 4437 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4438 } 4439 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4440 pcbddc->mat_graph->local_subs = local_subs; 4441 } 4442 4443 /* Graph's connected components analysis */ 4444 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4445 4446 /* print some info to stdout */ 4447 if (pcbddc->dbg_flag) { 4448 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4449 } 4450 4451 /* mark topography has done */ 4452 pcbddc->recompute_topography = PETSC_FALSE; 4453 PetscFunctionReturn(0); 4454 } 4455 4456 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4457 #undef __FUNCT__ 4458 #define __FUNCT__ "PCBDDCSubsetNumbering" 4459 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4460 { 4461 PetscSF sf; 4462 PetscLayout map; 4463 const PetscInt *idxs; 4464 PetscInt *leaf_data,*root_data,*gidxs; 4465 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4466 PetscInt n_n,nlocals,start,first_index; 4467 PetscMPIInt commsize; 4468 PetscBool first_found; 4469 PetscErrorCode ierr; 4470 4471 PetscFunctionBegin; 4472 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4473 if (subset_mult) { 4474 PetscCheckSameComm(subset,1,subset_mult,2); 4475 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4476 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4477 } 4478 /* create workspace layout for computing global indices of subset */ 4479 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4480 lbounds[0] = lbounds[1] = 0; 4481 for (i=0;i<n;i++) { 4482 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4483 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4484 } 4485 lbounds[0] = -lbounds[0]; 4486 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4487 gbounds[0] = -gbounds[0]; 4488 N = gbounds[1] - gbounds[0] + 1; 4489 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4490 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4491 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4492 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4493 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4494 4495 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4496 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4497 if (subset_mult) { 4498 const PetscInt* idxs_mult; 4499 4500 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4501 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4502 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4503 } else { 4504 for (i=0;i<n;i++) leaf_data[i] = 1; 4505 } 4506 /* local size of new subset */ 4507 n_n = 0; 4508 for (i=0;i<n;i++) n_n += leaf_data[i]; 4509 4510 /* global indexes in layout */ 4511 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4512 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4513 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4514 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4515 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4516 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4517 4518 /* reduce from leaves to roots */ 4519 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4520 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4521 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4522 4523 /* count indexes in local part of layout */ 4524 nlocals = 0; 4525 first_index = -1; 4526 first_found = PETSC_FALSE; 4527 for (i=0;i<Nl;i++) { 4528 if (!first_found && root_data[i]) { 4529 first_found = PETSC_TRUE; 4530 first_index = i; 4531 } 4532 nlocals += root_data[i]; 4533 } 4534 4535 /* cumulative of number of indexes and size of subset without holes */ 4536 #if defined(PETSC_HAVE_MPI_EXSCAN) 4537 start = 0; 4538 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4539 #else 4540 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4541 start = start-nlocals; 4542 #endif 4543 4544 if (N_n) { /* compute total size of new subset if requested */ 4545 *N_n = start + nlocals; 4546 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4547 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4548 } 4549 4550 /* adapt root data with cumulative */ 4551 if (first_found) { 4552 PetscInt old_index; 4553 4554 root_data[first_index] += start; 4555 old_index = first_index; 4556 for (i=first_index+1;i<Nl;i++) { 4557 if (root_data[i]) { 4558 root_data[i] += root_data[old_index]; 4559 old_index = i; 4560 } 4561 } 4562 } 4563 4564 /* from roots to leaves */ 4565 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4566 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4567 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4568 4569 /* create new IS with global indexes without holes */ 4570 if (subset_mult) { 4571 const PetscInt* idxs_mult; 4572 PetscInt cum; 4573 4574 cum = 0; 4575 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4576 for (i=0;i<n;i++) { 4577 PetscInt j; 4578 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4579 } 4580 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4581 } else { 4582 for (i=0;i<n;i++) { 4583 gidxs[i] = leaf_data[i]-1; 4584 } 4585 } 4586 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4587 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4588 PetscFunctionReturn(0); 4589 } 4590 4591 #undef __FUNCT__ 4592 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4593 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4594 { 4595 PetscInt i,j; 4596 PetscScalar *alphas; 4597 PetscErrorCode ierr; 4598 4599 PetscFunctionBegin; 4600 /* this implements stabilized Gram-Schmidt */ 4601 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4602 for (i=0;i<n;i++) { 4603 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4604 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4605 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4606 } 4607 ierr = PetscFree(alphas);CHKERRQ(ierr); 4608 PetscFunctionReturn(0); 4609 } 4610 4611 #undef __FUNCT__ 4612 #define __FUNCT__ "MatISGetSubassemblingPattern" 4613 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4614 { 4615 IS ranks_send_to; 4616 PetscInt n_neighs,*neighs,*n_shared,**shared; 4617 PetscMPIInt size,rank,color; 4618 PetscInt *xadj,*adjncy; 4619 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4620 PetscInt i,local_size,threshold=0; 4621 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4622 PetscSubcomm subcomm; 4623 PetscErrorCode ierr; 4624 4625 PetscFunctionBegin; 4626 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4627 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4628 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4629 4630 /* Get info on mapping */ 4631 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4632 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4633 4634 /* build local CSR graph of subdomains' connectivity */ 4635 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4636 xadj[0] = 0; 4637 xadj[1] = PetscMax(n_neighs-1,0); 4638 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4639 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4640 4641 if (threshold) { 4642 PetscInt xadj_count = 0; 4643 for (i=1;i<n_neighs;i++) { 4644 if (n_shared[i] > threshold) { 4645 adjncy[xadj_count] = neighs[i]; 4646 adjncy_wgt[xadj_count] = n_shared[i]; 4647 xadj_count++; 4648 } 4649 } 4650 xadj[1] = xadj_count; 4651 } else { 4652 if (xadj[1]) { 4653 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4654 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4655 } 4656 } 4657 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4658 if (use_square) { 4659 for (i=0;i<xadj[1];i++) { 4660 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4661 } 4662 } 4663 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4664 4665 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4666 4667 /* 4668 Restrict work on active processes only. 4669 */ 4670 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4671 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4672 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4673 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4674 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4675 if (color) { 4676 ierr = PetscFree(xadj);CHKERRQ(ierr); 4677 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4678 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4679 } else { 4680 Mat subdomain_adj; 4681 IS new_ranks,new_ranks_contig; 4682 MatPartitioning partitioner; 4683 PetscInt prank,rstart=0,rend=0; 4684 PetscInt *is_indices,*oldranks; 4685 PetscBool aggregate; 4686 4687 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4688 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4689 prank = rank; 4690 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4691 /* 4692 for (i=0;i<size;i++) { 4693 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4694 } 4695 */ 4696 for (i=0;i<xadj[1];i++) { 4697 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4698 } 4699 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4700 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4701 if (aggregate) { 4702 PetscInt lrows,row,ncols,*cols; 4703 PetscMPIInt nrank; 4704 PetscScalar *vals; 4705 4706 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4707 lrows = 0; 4708 if (nrank<redprocs) { 4709 lrows = size/redprocs; 4710 if (nrank<size%redprocs) lrows++; 4711 } 4712 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4713 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4714 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4715 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4716 row = nrank; 4717 ncols = xadj[1]-xadj[0]; 4718 cols = adjncy; 4719 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4720 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4721 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4722 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4723 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4724 ierr = PetscFree(xadj);CHKERRQ(ierr); 4725 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4726 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4727 ierr = PetscFree(vals);CHKERRQ(ierr); 4728 } else { 4729 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4730 } 4731 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4732 4733 /* Partition */ 4734 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4735 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4736 if (use_vwgt) { 4737 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4738 v_wgt[0] = local_size; 4739 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4740 } 4741 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4742 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4743 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4744 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4745 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4746 4747 /* renumber new_ranks to avoid "holes" in new set of processors */ 4748 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4749 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4750 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4751 if (!redprocs) { 4752 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4753 } else { 4754 PetscInt idxs[1]; 4755 PetscMPIInt tag; 4756 MPI_Request *reqs; 4757 4758 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4759 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4760 for (i=rstart;i<rend;i++) { 4761 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4762 } 4763 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4764 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4765 ierr = PetscFree(reqs);CHKERRQ(ierr); 4766 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4767 } 4768 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4769 /* clean up */ 4770 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4771 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4772 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4773 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4774 } 4775 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4776 4777 /* assemble parallel IS for sends */ 4778 i = 1; 4779 if (color) i=0; 4780 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4781 /* get back IS */ 4782 *is_sends = ranks_send_to; 4783 PetscFunctionReturn(0); 4784 } 4785 4786 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4787 4788 #undef __FUNCT__ 4789 #define __FUNCT__ "MatISSubassemble" 4790 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4791 { 4792 Mat local_mat; 4793 IS is_sends_internal; 4794 PetscInt rows,cols,new_local_rows; 4795 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4796 PetscBool ismatis,isdense,newisdense,destroy_mat; 4797 ISLocalToGlobalMapping l2gmap; 4798 PetscInt* l2gmap_indices; 4799 const PetscInt* is_indices; 4800 MatType new_local_type; 4801 /* buffers */ 4802 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4803 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4804 PetscInt *recv_buffer_idxs_local; 4805 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4806 /* MPI */ 4807 MPI_Comm comm,comm_n; 4808 PetscSubcomm subcomm; 4809 PetscMPIInt n_sends,n_recvs,commsize; 4810 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4811 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4812 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4813 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4814 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4815 PetscErrorCode ierr; 4816 4817 PetscFunctionBegin; 4818 /* TODO: add missing checks */ 4819 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4820 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4821 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4822 PetscValidLogicalCollectiveInt(mat,nis,7); 4823 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4824 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4825 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4826 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4827 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4828 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4829 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4830 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4831 PetscInt mrows,mcols,mnrows,mncols; 4832 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4833 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4834 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4835 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4836 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4837 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4838 } 4839 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4840 PetscValidLogicalCollectiveInt(mat,bs,0); 4841 /* prepare IS for sending if not provided */ 4842 if (!is_sends) { 4843 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4844 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4845 } else { 4846 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4847 is_sends_internal = is_sends; 4848 } 4849 4850 /* get comm */ 4851 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4852 4853 /* compute number of sends */ 4854 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4855 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4856 4857 /* compute number of receives */ 4858 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4859 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4860 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4861 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4862 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4863 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4864 ierr = PetscFree(iflags);CHKERRQ(ierr); 4865 4866 /* restrict comm if requested */ 4867 subcomm = 0; 4868 destroy_mat = PETSC_FALSE; 4869 if (restrict_comm) { 4870 PetscMPIInt color,subcommsize; 4871 4872 color = 0; 4873 if (restrict_full) { 4874 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4875 } else { 4876 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4877 } 4878 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4879 subcommsize = commsize - subcommsize; 4880 /* check if reuse has been requested */ 4881 if (reuse == MAT_REUSE_MATRIX) { 4882 if (*mat_n) { 4883 PetscMPIInt subcommsize2; 4884 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4885 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4886 comm_n = PetscObjectComm((PetscObject)*mat_n); 4887 } else { 4888 comm_n = PETSC_COMM_SELF; 4889 } 4890 } else { /* MAT_INITIAL_MATRIX */ 4891 PetscMPIInt rank; 4892 4893 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4894 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4895 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4896 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4897 comm_n = PetscSubcommChild(subcomm); 4898 } 4899 /* flag to destroy *mat_n if not significative */ 4900 if (color) destroy_mat = PETSC_TRUE; 4901 } else { 4902 comm_n = comm; 4903 } 4904 4905 /* prepare send/receive buffers */ 4906 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4907 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4908 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4909 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4910 if (nis) { 4911 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4912 } 4913 4914 /* Get data from local matrices */ 4915 if (!isdense) { 4916 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4917 /* TODO: See below some guidelines on how to prepare the local buffers */ 4918 /* 4919 send_buffer_vals should contain the raw values of the local matrix 4920 send_buffer_idxs should contain: 4921 - MatType_PRIVATE type 4922 - PetscInt size_of_l2gmap 4923 - PetscInt global_row_indices[size_of_l2gmap] 4924 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4925 */ 4926 } else { 4927 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4928 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4929 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4930 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4931 send_buffer_idxs[1] = i; 4932 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4933 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4934 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4935 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4936 for (i=0;i<n_sends;i++) { 4937 ilengths_vals[is_indices[i]] = len*len; 4938 ilengths_idxs[is_indices[i]] = len+2; 4939 } 4940 } 4941 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4942 /* additional is (if any) */ 4943 if (nis) { 4944 PetscMPIInt psum; 4945 PetscInt j; 4946 for (j=0,psum=0;j<nis;j++) { 4947 PetscInt plen; 4948 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4949 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4950 psum += len+1; /* indices + lenght */ 4951 } 4952 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4953 for (j=0,psum=0;j<nis;j++) { 4954 PetscInt plen; 4955 const PetscInt *is_array_idxs; 4956 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4957 send_buffer_idxs_is[psum] = plen; 4958 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4959 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4960 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4961 psum += plen+1; /* indices + lenght */ 4962 } 4963 for (i=0;i<n_sends;i++) { 4964 ilengths_idxs_is[is_indices[i]] = psum; 4965 } 4966 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4967 } 4968 4969 buf_size_idxs = 0; 4970 buf_size_vals = 0; 4971 buf_size_idxs_is = 0; 4972 for (i=0;i<n_recvs;i++) { 4973 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4974 buf_size_vals += (PetscInt)olengths_vals[i]; 4975 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4976 } 4977 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4978 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4979 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4980 4981 /* get new tags for clean communications */ 4982 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4983 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4984 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4985 4986 /* allocate for requests */ 4987 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4988 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4989 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4990 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4991 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4992 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4993 4994 /* communications */ 4995 ptr_idxs = recv_buffer_idxs; 4996 ptr_vals = recv_buffer_vals; 4997 ptr_idxs_is = recv_buffer_idxs_is; 4998 for (i=0;i<n_recvs;i++) { 4999 source_dest = onodes[i]; 5000 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5001 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5002 ptr_idxs += olengths_idxs[i]; 5003 ptr_vals += olengths_vals[i]; 5004 if (nis) { 5005 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5006 ptr_idxs_is += olengths_idxs_is[i]; 5007 } 5008 } 5009 for (i=0;i<n_sends;i++) { 5010 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5011 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5012 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5013 if (nis) { 5014 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5015 } 5016 } 5017 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5018 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5019 5020 /* assemble new l2g map */ 5021 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5022 ptr_idxs = recv_buffer_idxs; 5023 new_local_rows = 0; 5024 for (i=0;i<n_recvs;i++) { 5025 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5026 ptr_idxs += olengths_idxs[i]; 5027 } 5028 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5029 ptr_idxs = recv_buffer_idxs; 5030 new_local_rows = 0; 5031 for (i=0;i<n_recvs;i++) { 5032 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5033 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5034 ptr_idxs += olengths_idxs[i]; 5035 } 5036 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5037 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5038 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5039 5040 /* infer new local matrix type from received local matrices type */ 5041 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5042 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5043 if (n_recvs) { 5044 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5045 ptr_idxs = recv_buffer_idxs; 5046 for (i=0;i<n_recvs;i++) { 5047 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5048 new_local_type_private = MATAIJ_PRIVATE; 5049 break; 5050 } 5051 ptr_idxs += olengths_idxs[i]; 5052 } 5053 switch (new_local_type_private) { 5054 case MATDENSE_PRIVATE: 5055 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5056 new_local_type = MATSEQAIJ; 5057 bs = 1; 5058 } else { /* if I receive only 1 dense matrix */ 5059 new_local_type = MATSEQDENSE; 5060 bs = 1; 5061 } 5062 break; 5063 case MATAIJ_PRIVATE: 5064 new_local_type = MATSEQAIJ; 5065 bs = 1; 5066 break; 5067 case MATBAIJ_PRIVATE: 5068 new_local_type = MATSEQBAIJ; 5069 break; 5070 case MATSBAIJ_PRIVATE: 5071 new_local_type = MATSEQSBAIJ; 5072 break; 5073 default: 5074 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5075 break; 5076 } 5077 } else { /* by default, new_local_type is seqdense */ 5078 new_local_type = MATSEQDENSE; 5079 bs = 1; 5080 } 5081 5082 /* create MATIS object if needed */ 5083 if (reuse == MAT_INITIAL_MATRIX) { 5084 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5085 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5086 } else { 5087 /* it also destroys the local matrices */ 5088 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5089 } 5090 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5091 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5092 5093 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5094 5095 /* Global to local map of received indices */ 5096 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5097 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5098 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5099 5100 /* restore attributes -> type of incoming data and its size */ 5101 buf_size_idxs = 0; 5102 for (i=0;i<n_recvs;i++) { 5103 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5104 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5105 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5106 } 5107 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5108 5109 /* set preallocation */ 5110 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5111 if (!newisdense) { 5112 PetscInt *new_local_nnz=0; 5113 5114 ptr_vals = recv_buffer_vals; 5115 ptr_idxs = recv_buffer_idxs_local; 5116 if (n_recvs) { 5117 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5118 } 5119 for (i=0;i<n_recvs;i++) { 5120 PetscInt j; 5121 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5122 for (j=0;j<*(ptr_idxs+1);j++) { 5123 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5124 } 5125 } else { 5126 /* TODO */ 5127 } 5128 ptr_idxs += olengths_idxs[i]; 5129 } 5130 if (new_local_nnz) { 5131 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5132 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5133 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5134 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5135 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5136 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5137 } else { 5138 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5139 } 5140 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5141 } else { 5142 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5143 } 5144 5145 /* set values */ 5146 ptr_vals = recv_buffer_vals; 5147 ptr_idxs = recv_buffer_idxs_local; 5148 for (i=0;i<n_recvs;i++) { 5149 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5150 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5151 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5152 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5153 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5154 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5155 } else { 5156 /* TODO */ 5157 } 5158 ptr_idxs += olengths_idxs[i]; 5159 ptr_vals += olengths_vals[i]; 5160 } 5161 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5162 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5163 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5164 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5165 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5166 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5167 5168 #if 0 5169 if (!restrict_comm) { /* check */ 5170 Vec lvec,rvec; 5171 PetscReal infty_error; 5172 5173 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5174 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5175 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5176 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5177 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5178 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5179 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5180 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5181 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5182 } 5183 #endif 5184 5185 /* assemble new additional is (if any) */ 5186 if (nis) { 5187 PetscInt **temp_idxs,*count_is,j,psum; 5188 5189 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5190 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5191 ptr_idxs = recv_buffer_idxs_is; 5192 psum = 0; 5193 for (i=0;i<n_recvs;i++) { 5194 for (j=0;j<nis;j++) { 5195 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5196 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5197 psum += plen; 5198 ptr_idxs += plen+1; /* shift pointer to received data */ 5199 } 5200 } 5201 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5202 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5203 for (i=1;i<nis;i++) { 5204 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5205 } 5206 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5207 ptr_idxs = recv_buffer_idxs_is; 5208 for (i=0;i<n_recvs;i++) { 5209 for (j=0;j<nis;j++) { 5210 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5211 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5212 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5213 ptr_idxs += plen+1; /* shift pointer to received data */ 5214 } 5215 } 5216 for (i=0;i<nis;i++) { 5217 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5218 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5219 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5220 } 5221 ierr = PetscFree(count_is);CHKERRQ(ierr); 5222 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5223 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5224 } 5225 /* free workspace */ 5226 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5227 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5228 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5229 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5230 if (isdense) { 5231 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5232 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5233 } else { 5234 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5235 } 5236 if (nis) { 5237 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5238 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5239 } 5240 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5241 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5242 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5243 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5244 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5245 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5246 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5247 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5248 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5249 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5250 ierr = PetscFree(onodes);CHKERRQ(ierr); 5251 if (nis) { 5252 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5253 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5254 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5255 } 5256 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5257 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5258 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5259 for (i=0;i<nis;i++) { 5260 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5261 } 5262 *mat_n = NULL; 5263 } 5264 PetscFunctionReturn(0); 5265 } 5266 5267 /* temporary hack into ksp private data structure */ 5268 #include <petsc/private/kspimpl.h> 5269 5270 #undef __FUNCT__ 5271 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5272 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5273 { 5274 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5275 PC_IS *pcis = (PC_IS*)pc->data; 5276 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5277 MatNullSpace CoarseNullSpace=NULL; 5278 ISLocalToGlobalMapping coarse_islg; 5279 IS coarse_is,*isarray; 5280 PetscInt i,im_active=-1,active_procs=-1; 5281 PetscInt nis,nisdofs,nisneu,nisvert; 5282 PC pc_temp; 5283 PCType coarse_pc_type; 5284 KSPType coarse_ksp_type; 5285 PetscBool multilevel_requested,multilevel_allowed; 5286 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5287 Mat t_coarse_mat_is; 5288 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5289 PetscMPIInt all_procs; 5290 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5291 PetscBool compute_vecs = PETSC_FALSE; 5292 PetscScalar *array; 5293 PetscErrorCode ierr; 5294 5295 PetscFunctionBegin; 5296 /* Assign global numbering to coarse dofs */ 5297 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5298 PetscInt ocoarse_size; 5299 compute_vecs = PETSC_TRUE; 5300 ocoarse_size = pcbddc->coarse_size; 5301 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5302 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5303 /* see if we can avoid some work */ 5304 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5305 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5306 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5307 PC pc; 5308 PetscBool isbddc; 5309 5310 /* temporary workaround since PCBDDC does not have a reset method so far */ 5311 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5312 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5313 if (isbddc) { 5314 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5315 } else { 5316 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5317 } 5318 coarse_reuse = PETSC_FALSE; 5319 } else { /* we can safely reuse already computed coarse matrix */ 5320 coarse_reuse = PETSC_TRUE; 5321 } 5322 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5323 coarse_reuse = PETSC_FALSE; 5324 } 5325 /* reset any subassembling information */ 5326 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5327 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5328 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5329 coarse_reuse = PETSC_TRUE; 5330 } 5331 5332 /* count "active" (i.e. with positive local size) and "void" processes */ 5333 im_active = !!(pcis->n); 5334 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5335 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5336 void_procs = all_procs-active_procs; 5337 csin_type_simple = PETSC_TRUE; 5338 redist = PETSC_FALSE; 5339 if (pcbddc->current_level && void_procs) { 5340 csin_ml = PETSC_TRUE; 5341 ncoarse_ml = void_procs; 5342 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5343 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5344 csin_ds = PETSC_TRUE; 5345 ncoarse_ds = pcbddc->redistribute_coarse; 5346 redist = PETSC_TRUE; 5347 } else { 5348 csin_ds = PETSC_TRUE; 5349 ncoarse_ds = active_procs; 5350 redist = PETSC_TRUE; 5351 } 5352 } else { 5353 csin_ml = PETSC_FALSE; 5354 ncoarse_ml = all_procs; 5355 if (void_procs) { 5356 csin_ds = PETSC_TRUE; 5357 ncoarse_ds = void_procs; 5358 csin_type_simple = PETSC_FALSE; 5359 } else { 5360 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5361 csin_ds = PETSC_TRUE; 5362 ncoarse_ds = pcbddc->redistribute_coarse; 5363 redist = PETSC_TRUE; 5364 } else { 5365 csin_ds = PETSC_FALSE; 5366 ncoarse_ds = all_procs; 5367 } 5368 } 5369 } 5370 5371 /* 5372 test if we can go multilevel: three conditions must be satisfied: 5373 - we have not exceeded the number of levels requested 5374 - we can actually subassemble the active processes 5375 - we can find a suitable number of MPI processes where we can place the subassembled problem 5376 */ 5377 multilevel_allowed = PETSC_FALSE; 5378 multilevel_requested = PETSC_FALSE; 5379 if (pcbddc->current_level < pcbddc->max_levels) { 5380 multilevel_requested = PETSC_TRUE; 5381 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5382 multilevel_allowed = PETSC_FALSE; 5383 } else { 5384 multilevel_allowed = PETSC_TRUE; 5385 } 5386 } 5387 /* determine number of process partecipating to coarse solver */ 5388 if (multilevel_allowed) { 5389 ncoarse = ncoarse_ml; 5390 csin = csin_ml; 5391 redist = PETSC_FALSE; 5392 } else { 5393 ncoarse = ncoarse_ds; 5394 csin = csin_ds; 5395 } 5396 5397 /* creates temporary l2gmap and IS for coarse indexes */ 5398 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5399 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5400 5401 /* creates temporary MATIS object for coarse matrix */ 5402 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5403 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5404 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5405 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5406 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5407 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5408 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5409 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5410 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5411 5412 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5413 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5414 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5415 const PetscInt *idxs; 5416 ISLocalToGlobalMapping tmap; 5417 5418 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5419 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5420 /* allocate space for temporary storage */ 5421 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5422 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5423 /* allocate for IS array */ 5424 nisdofs = pcbddc->n_ISForDofsLocal; 5425 nisneu = !!pcbddc->NeumannBoundariesLocal; 5426 nisvert = 0; 5427 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5428 nisvert = 1; 5429 } 5430 nis = nisdofs + nisneu + nisvert; 5431 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5432 /* dofs splitting */ 5433 for (i=0;i<nisdofs;i++) { 5434 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5435 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5436 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5437 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5438 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5439 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5440 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5441 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5442 } 5443 /* neumann boundaries */ 5444 if (pcbddc->NeumannBoundariesLocal) { 5445 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5446 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5447 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5448 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5449 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5450 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5451 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5452 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5453 } 5454 /* primal vertices (benign) */ 5455 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5456 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5457 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5458 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5459 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5460 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5461 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5462 } 5463 /* free memory */ 5464 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5465 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5466 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5467 } else { 5468 nis = 0; 5469 nisdofs = 0; 5470 nisneu = 0; 5471 nisvert = 0; 5472 isarray = NULL; 5473 } 5474 /* destroy no longer needed map */ 5475 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5476 5477 /* restrict on coarse candidates (if needed) */ 5478 coarse_mat_is = NULL; 5479 if (csin) { 5480 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5481 if (redist) { 5482 PetscMPIInt rank; 5483 PetscInt spc,n_spc_p1,dest[1],destsize; 5484 5485 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5486 spc = active_procs/ncoarse; 5487 n_spc_p1 = active_procs%ncoarse; 5488 if (im_active) { 5489 destsize = 1; 5490 if (rank > n_spc_p1*(spc+1)-1) { 5491 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5492 } else { 5493 dest[0] = rank/(spc+1); 5494 } 5495 } else { 5496 destsize = 0; 5497 } 5498 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5499 } else if (csin_type_simple) { 5500 PetscMPIInt rank; 5501 PetscInt issize,isidx; 5502 5503 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5504 if (im_active) { 5505 issize = 1; 5506 isidx = (PetscInt)rank; 5507 } else { 5508 issize = 0; 5509 isidx = -1; 5510 } 5511 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5512 } else { /* get a suitable subassembling pattern from MATIS code */ 5513 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5514 } 5515 5516 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5517 if (!redist || ncoarse <= void_procs) { 5518 PetscInt ncoarse_cand,tissize,*nisindices; 5519 PetscInt *coarse_candidates; 5520 const PetscInt* tisindices; 5521 5522 /* get coarse candidates' ranks in pc communicator */ 5523 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5524 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5525 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5526 if (!coarse_candidates[i]) { 5527 coarse_candidates[ncoarse_cand++]=i; 5528 } 5529 } 5530 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5531 5532 5533 if (pcbddc->dbg_flag) { 5534 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5536 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5537 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5538 for (i=0;i<ncoarse_cand;i++) { 5539 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5540 } 5541 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5542 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5543 } 5544 /* shift the pattern on coarse candidates */ 5545 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5546 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5547 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5548 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5549 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5550 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5551 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5552 } 5553 if (pcbddc->dbg_flag) { 5554 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5555 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5556 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5557 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5558 } 5559 } 5560 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5561 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5562 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5563 } else { /* this is the last level, so use just receiving processes in subcomm */ 5564 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5565 } 5566 } else { 5567 if (pcbddc->dbg_flag) { 5568 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5570 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5571 } 5572 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5573 coarse_mat_is = t_coarse_mat_is; 5574 } 5575 5576 /* create local to global scatters for coarse problem */ 5577 if (compute_vecs) { 5578 PetscInt lrows; 5579 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5580 if (coarse_mat_is) { 5581 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5582 } else { 5583 lrows = 0; 5584 } 5585 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5586 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5587 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5588 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5589 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5590 } 5591 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5592 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5593 5594 /* set defaults for coarse KSP and PC */ 5595 if (multilevel_allowed) { 5596 coarse_ksp_type = KSPRICHARDSON; 5597 coarse_pc_type = PCBDDC; 5598 } else { 5599 coarse_ksp_type = KSPPREONLY; 5600 coarse_pc_type = PCREDUNDANT; 5601 } 5602 5603 /* print some info if requested */ 5604 if (pcbddc->dbg_flag) { 5605 if (!multilevel_allowed) { 5606 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5607 if (multilevel_requested) { 5608 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5609 } else if (pcbddc->max_levels) { 5610 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5611 } 5612 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5613 } 5614 } 5615 5616 /* create the coarse KSP object only once with defaults */ 5617 if (coarse_mat_is) { 5618 MatReuse coarse_mat_reuse; 5619 PetscViewer dbg_viewer = NULL; 5620 if (pcbddc->dbg_flag) { 5621 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5622 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5623 } 5624 if (!pcbddc->coarse_ksp) { 5625 char prefix[256],str_level[16]; 5626 size_t len; 5627 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5628 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5629 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5630 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5631 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5632 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5633 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5634 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5635 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5636 /* prefix */ 5637 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5638 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5639 if (!pcbddc->current_level) { 5640 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5641 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5642 } else { 5643 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5644 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5645 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5646 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5647 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5648 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5649 } 5650 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5651 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5652 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5653 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5654 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5655 /* allow user customization */ 5656 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5657 } 5658 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5659 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5660 if (nisdofs) { 5661 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5662 for (i=0;i<nisdofs;i++) { 5663 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5664 } 5665 } 5666 if (nisneu) { 5667 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5668 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5669 } 5670 if (nisvert) { 5671 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5672 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5673 } 5674 5675 /* get some info after set from options */ 5676 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5677 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5678 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5679 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5680 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5681 isbddc = PETSC_FALSE; 5682 } 5683 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5684 if (isredundant) { 5685 KSP inner_ksp; 5686 PC inner_pc; 5687 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5688 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5689 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5690 } 5691 5692 /* assemble coarse matrix */ 5693 if (coarse_reuse) { 5694 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5695 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5696 coarse_mat_reuse = MAT_REUSE_MATRIX; 5697 } else { 5698 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5699 } 5700 if (isbddc || isnn) { 5701 if (isbddc) { /* currently there's no API for this */ 5702 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5703 pcbddc->detect_disconnected = PETSC_TRUE; 5704 } 5705 if (pcbddc->coarsening_ratio > 1) { 5706 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5707 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5708 if (pcbddc->dbg_flag) { 5709 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5710 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5711 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5712 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5713 } 5714 } 5715 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5716 } else { 5717 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5718 coarse_mat = coarse_mat_is; 5719 } 5720 } else { 5721 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5722 } 5723 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5724 5725 /* propagate symmetry info of coarse matrix */ 5726 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5727 if (pc->pmat->symmetric_set) { 5728 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5729 } 5730 if (pc->pmat->hermitian_set) { 5731 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5732 } 5733 if (pc->pmat->spd_set) { 5734 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5735 } 5736 /* set operators */ 5737 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5738 if (pcbddc->dbg_flag) { 5739 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5740 } 5741 } else { /* processes non partecipating to coarse solver (if any) */ 5742 coarse_mat = 0; 5743 } 5744 ierr = PetscFree(isarray);CHKERRQ(ierr); 5745 #if 0 5746 { 5747 PetscViewer viewer; 5748 char filename[256]; 5749 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5750 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5751 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5752 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5753 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5754 } 5755 #endif 5756 5757 /* Compute coarse null space (special handling by BDDC only) */ 5758 #if 0 5759 if (pcbddc->NullSpace) { 5760 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5761 } 5762 #endif 5763 /* hack */ 5764 if (pcbddc->coarse_ksp) { 5765 Vec crhs,csol; 5766 5767 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5768 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5769 if (!csol) { 5770 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5771 } 5772 if (!crhs) { 5773 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5774 } 5775 } 5776 5777 /* compute null space for coarse solver if the benign trick has been requested */ 5778 if (pcbddc->benign_null) { 5779 5780 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5781 for (i=0;i<pcbddc->benign_n;i++) { 5782 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5783 } 5784 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5785 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5786 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5787 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5788 if (coarse_mat) { 5789 Vec nullv; 5790 PetscScalar *array,*array2; 5791 PetscInt nl; 5792 5793 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5794 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5795 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5796 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5797 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5798 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5799 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5800 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5801 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5802 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5803 } 5804 } 5805 5806 if (pcbddc->coarse_ksp) { 5807 PetscBool ispreonly; 5808 5809 if (CoarseNullSpace) { 5810 PetscBool isnull; 5811 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5812 if (0) { 5813 if (isbddc && !pcbddc->benign_saddle_point) { 5814 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5815 } else { 5816 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5817 } 5818 } else { 5819 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5820 } 5821 } 5822 /* setup coarse ksp */ 5823 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5824 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5825 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5826 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5827 KSP check_ksp; 5828 KSPType check_ksp_type; 5829 PC check_pc; 5830 Vec check_vec,coarse_vec; 5831 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5832 PetscInt its; 5833 PetscBool compute_eigs; 5834 PetscReal *eigs_r,*eigs_c; 5835 PetscInt neigs; 5836 const char *prefix; 5837 5838 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5839 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5840 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5841 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5842 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5843 if (ispreonly) { 5844 check_ksp_type = KSPPREONLY; 5845 compute_eigs = PETSC_FALSE; 5846 } else { 5847 check_ksp_type = KSPGMRES; 5848 compute_eigs = PETSC_TRUE; 5849 } 5850 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5851 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5852 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5853 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5854 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5855 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5856 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5857 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5858 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5859 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5860 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5861 /* create random vec */ 5862 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5863 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5864 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5865 if (CoarseNullSpace) { 5866 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5867 } 5868 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5869 /* solve coarse problem */ 5870 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5871 if (CoarseNullSpace) { 5872 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5873 } 5874 /* set eigenvalue estimation if preonly has not been requested */ 5875 if (compute_eigs) { 5876 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5877 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5878 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5879 lambda_max = eigs_r[neigs-1]; 5880 lambda_min = eigs_r[0]; 5881 if (pcbddc->use_coarse_estimates) { 5882 if (lambda_max>lambda_min) { 5883 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5884 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5885 } 5886 } 5887 } 5888 5889 /* check coarse problem residual error */ 5890 if (pcbddc->dbg_flag) { 5891 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5892 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5893 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5894 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5895 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5896 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5897 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5898 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5899 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5900 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5901 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5902 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5903 if (CoarseNullSpace) { 5904 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5905 } 5906 if (compute_eigs) { 5907 PetscReal lambda_max_s,lambda_min_s; 5908 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5909 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5910 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5911 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5912 for (i=0;i<neigs;i++) { 5913 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5914 } 5915 } 5916 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5917 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5918 } 5919 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5920 if (compute_eigs) { 5921 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5922 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5923 } 5924 } 5925 } 5926 /* print additional info */ 5927 if (pcbddc->dbg_flag) { 5928 /* waits until all processes reaches this point */ 5929 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5930 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5931 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5932 } 5933 5934 /* free memory */ 5935 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5936 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5937 PetscFunctionReturn(0); 5938 } 5939 5940 #undef __FUNCT__ 5941 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5942 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5943 { 5944 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5945 PC_IS* pcis = (PC_IS*)pc->data; 5946 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5947 IS subset,subset_mult,subset_n; 5948 PetscInt local_size,coarse_size=0; 5949 PetscInt *local_primal_indices=NULL; 5950 const PetscInt *t_local_primal_indices; 5951 PetscErrorCode ierr; 5952 5953 PetscFunctionBegin; 5954 /* Compute global number of coarse dofs */ 5955 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5956 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5957 } 5958 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5959 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5960 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5961 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5962 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5963 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5964 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5965 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5966 if (local_size != pcbddc->local_primal_size) { 5967 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5968 } 5969 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5970 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5971 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5972 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5973 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5974 5975 /* check numbering */ 5976 if (pcbddc->dbg_flag) { 5977 PetscScalar coarsesum,*array,*array2; 5978 PetscInt i; 5979 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5980 5981 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5982 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5983 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5984 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5985 /* counter */ 5986 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5987 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5988 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5989 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5990 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5991 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5992 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5993 for (i=0;i<pcbddc->local_primal_size;i++) { 5994 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5995 } 5996 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5997 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5998 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5999 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6000 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6001 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6002 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6003 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6004 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6005 for (i=0;i<pcis->n;i++) { 6006 if (array[i] != 0.0 && array[i] != array2[i]) { 6007 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6008 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6009 set_error = PETSC_TRUE; 6010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6011 } 6012 } 6013 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6014 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6015 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6016 for (i=0;i<pcis->n;i++) { 6017 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6018 } 6019 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6020 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6021 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6022 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6023 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6024 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6025 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6026 PetscInt *gidxs; 6027 6028 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6029 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6033 for (i=0;i<pcbddc->local_primal_size;i++) { 6034 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6035 } 6036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6037 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6038 } 6039 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6040 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6041 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6042 } 6043 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6044 /* get back data */ 6045 *coarse_size_n = coarse_size; 6046 *local_primal_indices_n = local_primal_indices; 6047 PetscFunctionReturn(0); 6048 } 6049 6050 #undef __FUNCT__ 6051 #define __FUNCT__ "PCBDDCGlobalToLocal" 6052 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6053 { 6054 IS localis_t; 6055 PetscInt i,lsize,*idxs,n; 6056 PetscScalar *vals; 6057 PetscErrorCode ierr; 6058 6059 PetscFunctionBegin; 6060 /* get indices in local ordering exploiting local to global map */ 6061 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6062 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6063 for (i=0;i<lsize;i++) vals[i] = 1.0; 6064 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6065 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6066 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6067 if (idxs) { /* multilevel guard */ 6068 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6069 } 6070 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6071 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6072 ierr = PetscFree(vals);CHKERRQ(ierr); 6073 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6074 /* now compute set in local ordering */ 6075 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6076 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6077 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6078 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6079 for (i=0,lsize=0;i<n;i++) { 6080 if (PetscRealPart(vals[i]) > 0.5) { 6081 lsize++; 6082 } 6083 } 6084 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6085 for (i=0,lsize=0;i<n;i++) { 6086 if (PetscRealPart(vals[i]) > 0.5) { 6087 idxs[lsize++] = i; 6088 } 6089 } 6090 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6091 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6092 *localis = localis_t; 6093 PetscFunctionReturn(0); 6094 } 6095 6096 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6097 #undef __FUNCT__ 6098 #define __FUNCT__ "PCBDDCMatMult_Private" 6099 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6100 { 6101 PCBDDCChange_ctx change_ctx; 6102 PetscErrorCode ierr; 6103 6104 PetscFunctionBegin; 6105 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6106 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6107 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6108 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6109 PetscFunctionReturn(0); 6110 } 6111 6112 #undef __FUNCT__ 6113 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6114 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6115 { 6116 PCBDDCChange_ctx change_ctx; 6117 PetscErrorCode ierr; 6118 6119 PetscFunctionBegin; 6120 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6121 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6122 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6123 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6124 PetscFunctionReturn(0); 6125 } 6126 6127 #undef __FUNCT__ 6128 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6129 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6130 { 6131 PC_IS *pcis=(PC_IS*)pc->data; 6132 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6133 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6134 Mat S_j; 6135 PetscInt *used_xadj,*used_adjncy; 6136 PetscBool free_used_adj; 6137 PetscErrorCode ierr; 6138 6139 PetscFunctionBegin; 6140 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6141 free_used_adj = PETSC_FALSE; 6142 if (pcbddc->sub_schurs_layers == -1) { 6143 used_xadj = NULL; 6144 used_adjncy = NULL; 6145 } else { 6146 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6147 used_xadj = pcbddc->mat_graph->xadj; 6148 used_adjncy = pcbddc->mat_graph->adjncy; 6149 } else if (pcbddc->computed_rowadj) { 6150 used_xadj = pcbddc->mat_graph->xadj; 6151 used_adjncy = pcbddc->mat_graph->adjncy; 6152 } else { 6153 PetscBool flg_row=PETSC_FALSE; 6154 const PetscInt *xadj,*adjncy; 6155 PetscInt nvtxs; 6156 6157 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6158 if (flg_row) { 6159 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6160 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6161 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6162 free_used_adj = PETSC_TRUE; 6163 } else { 6164 pcbddc->sub_schurs_layers = -1; 6165 used_xadj = NULL; 6166 used_adjncy = NULL; 6167 } 6168 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6169 } 6170 } 6171 6172 /* setup sub_schurs data */ 6173 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6174 if (!sub_schurs->schur_explicit) { 6175 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6176 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6177 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6178 } else { 6179 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6180 PetscBool isseqaij; 6181 PetscInt benign_n; 6182 6183 if (!pcbddc->use_vertices && reuse_solvers) { 6184 PetscInt n_vertices; 6185 6186 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6187 reuse_solvers = (PetscBool)!n_vertices; 6188 } 6189 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6190 if (!isseqaij) { 6191 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6192 if (matis->A == pcbddc->local_mat) { 6193 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6194 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6195 } else { 6196 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6197 } 6198 } 6199 if (!pcbddc->benign_change_explicit) { 6200 benign_n = pcbddc->benign_n; 6201 } else { 6202 benign_n = 0; 6203 } 6204 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6205 /* If the user defines additional constraints, we import them here. 6206 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6207 if (pcbddc->adaptive_userdefined) { 6208 PC_IS *pcisf; 6209 PC_BDDC *pcbddcf; 6210 PC pcf; 6211 6212 if (pcbddc->sub_schurs_rebuild) { 6213 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6214 } 6215 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6216 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6217 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6218 /* hacks */ 6219 pcisf = (PC_IS*)pcf->data; 6220 pcbddcf = (PC_BDDC*)pcf->data; 6221 *pcisf = *pcis; 6222 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6223 pcbddcf->mat_graph = pcbddc->mat_graph; 6224 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6225 pcbddcf->sub_schurs = sub_schurs; 6226 pcbddcf->use_faces = PETSC_TRUE; 6227 pcbddcf->use_change_of_basis = PETSC_TRUE; 6228 pcbddcf->use_change_on_faces = PETSC_TRUE; 6229 pcbddcf->use_deluxe_scaling = PETSC_TRUE; 6230 pcbddcf->deluxe_zerorows = PETSC_TRUE; 6231 pcbddcf->fake_change = PETSC_TRUE; 6232 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6233 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6234 pcbddcf->sub_schurs = NULL; 6235 ierr = MatDestroy(&pcbddcf->ConstraintMatrix);CHKERRQ(ierr); 6236 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6237 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6238 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6239 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6240 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6241 pcf->ops->destroy = NULL; 6242 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6243 } 6244 } 6245 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6246 6247 /* free adjacency */ 6248 if (free_used_adj) { 6249 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6250 } 6251 PetscFunctionReturn(0); 6252 } 6253 6254 #undef __FUNCT__ 6255 #define __FUNCT__ "PCBDDCInitSubSchurs" 6256 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6257 { 6258 PC_IS *pcis=(PC_IS*)pc->data; 6259 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6260 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6261 PCBDDCGraph graph; 6262 PetscErrorCode ierr; 6263 6264 PetscFunctionBegin; 6265 /* attach interface graph for determining subsets */ 6266 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6267 IS verticesIS,verticescomm; 6268 PetscInt vsize,*idxs; 6269 6270 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6271 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6272 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6273 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6274 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6275 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6276 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6277 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6278 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6279 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6280 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6281 /* 6282 if (pcbddc->dbg_flag) { 6283 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6284 } 6285 */ 6286 } else { 6287 graph = pcbddc->mat_graph; 6288 } 6289 6290 /* sub_schurs init */ 6291 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6292 6293 /* free graph struct */ 6294 if (pcbddc->sub_schurs_rebuild) { 6295 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6296 } 6297 PetscFunctionReturn(0); 6298 } 6299 6300 #undef __FUNCT__ 6301 #define __FUNCT__ "PCBDDCCheckOperator" 6302 PetscErrorCode PCBDDCCheckOperator(PC pc) 6303 { 6304 PC_IS *pcis=(PC_IS*)pc->data; 6305 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6306 PetscErrorCode ierr; 6307 6308 PetscFunctionBegin; 6309 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6310 IS zerodiag = NULL; 6311 Mat S_j,B0_B=NULL; 6312 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6313 PetscScalar *p0_check,*array,*array2; 6314 PetscReal norm; 6315 PetscInt i; 6316 6317 /* B0 and B0_B */ 6318 if (zerodiag) { 6319 IS dummy; 6320 6321 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6322 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6323 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6324 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6325 } 6326 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6327 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6328 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6329 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6330 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6331 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6332 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6333 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6334 /* S_j */ 6335 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6336 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6337 6338 /* mimic vector in \widetilde{W}_\Gamma */ 6339 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6340 /* continuous in primal space */ 6341 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6342 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6343 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6344 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6345 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6346 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6347 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6348 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6349 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6350 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6351 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6352 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6353 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6354 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6355 6356 /* assemble rhs for coarse problem */ 6357 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6358 /* local with Schur */ 6359 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6360 if (zerodiag) { 6361 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6362 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6363 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6364 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6365 } 6366 /* sum on primal nodes the local contributions */ 6367 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6368 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6369 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6370 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6371 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6372 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6373 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6374 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6375 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6376 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6377 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6378 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6379 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6380 /* scale primal nodes (BDDC sums contibutions) */ 6381 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6382 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6383 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6384 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6385 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6386 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6387 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6388 /* global: \widetilde{B0}_B w_\Gamma */ 6389 if (zerodiag) { 6390 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6391 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6392 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6393 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6394 } 6395 /* BDDC */ 6396 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6397 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6398 6399 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6400 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6401 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6402 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6403 for (i=0;i<pcbddc->benign_n;i++) { 6404 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6405 } 6406 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6407 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6408 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6409 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6410 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6411 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6412 } 6413 PetscFunctionReturn(0); 6414 } 6415