1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 /* no-net-flux */ 650 if (pcbddc->benign_compute_nonetflux) { 651 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 652 MatNullSpace near_null_space; 653 ISLocalToGlobalMapping rmap; 654 Vec quad_vec; 655 PetscScalar *pvals; 656 PetscInt i,np,*dummyins; 657 658 /* create vector to hold quadrature weights */ 659 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 660 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 661 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 662 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 663 664 /* compute B^{(i)T} * 1_p */ 665 np = 0; 666 if (zerodiag) { 667 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 668 } else if (pressures) { 669 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 670 } 671 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 672 for (i=0;i<np;i++) pvals[i] = 1.; 673 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 674 if (np) { 675 const PetscInt *pidxs; 676 677 if (zerodiag) { 678 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 679 } else if (pressures) { 680 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 681 } 682 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 683 if (zerodiag) { 684 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 685 } else if (pressures) { 686 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 687 } 688 } 689 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 690 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 691 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 692 ierr = PetscFree(pvals);CHKERRQ(ierr); 693 694 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 695 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 696 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 697 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 698 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 699 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 700 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 701 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 702 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 703 for (i=0;i<matis->sf_nleaves;i++) { 704 if (PetscGlobalRank != matis->sf_leafdata[i]) { 705 pvals[i] = 0.; 706 } 707 } 708 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 709 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 710 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 711 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 712 ierr = PetscFree(dummyins);CHKERRQ(ierr); 713 714 /* assembly quadrature vec and attach near null space to pmat */ 715 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 716 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 717 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 718 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 719 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 720 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 721 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 722 } 723 724 /* change of basis and p0 dofs */ 725 if (has_null_pressures) { 726 IS zerodiagc; 727 const PetscInt *idxs,*idxsc; 728 PetscInt i,s,*nnz; 729 730 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 731 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 732 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 733 /* local change of basis for pressures */ 734 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 735 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 736 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 737 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 738 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 739 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 740 for (i=0;i<pcbddc->benign_n;i++) { 741 PetscInt nzs,j; 742 743 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 744 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 745 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 746 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 747 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 748 } 749 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 750 ierr = PetscFree(nnz);CHKERRQ(ierr); 751 /* set identity on velocities */ 752 for (i=0;i<n-nz;i++) { 753 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 754 } 755 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 756 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 757 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 758 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 759 /* set change on pressures */ 760 for (s=0;s<pcbddc->benign_n;s++) { 761 PetscScalar *array; 762 PetscInt nzs; 763 764 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 765 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 766 for (i=0;i<nzs-1;i++) { 767 PetscScalar vals[2]; 768 PetscInt cols[2]; 769 770 cols[0] = idxs[i]; 771 cols[1] = idxs[nzs-1]; 772 vals[0] = 1.; 773 vals[1] = 1.; 774 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 775 } 776 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 777 for (i=0;i<nzs-1;i++) array[i] = -1.; 778 array[nzs-1] = 1.; 779 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 780 /* store local idxs for p0 */ 781 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 782 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 783 ierr = PetscFree(array);CHKERRQ(ierr); 784 } 785 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 787 /* project if needed */ 788 if (pcbddc->benign_change_explicit) { 789 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 790 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 791 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 792 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 793 } 794 /* store global idxs for p0 */ 795 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 796 } 797 pcbddc->benign_zerodiag_subs = zerodiag_subs; 798 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 799 800 /* determines if the coarse solver will be singular or not */ 801 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 802 *zerodiaglocal = zerodiag; 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 808 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 809 { 810 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 if (!pcbddc->benign_sf) { 815 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 816 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 817 } 818 if (get) { /* use SF to get values */ 819 PetscScalar *array; 820 821 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 822 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 823 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 824 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 825 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 826 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 828 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 835 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 836 { 837 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 838 PetscErrorCode ierr; 839 840 PetscFunctionBegin; 841 /* TODO: add error checking 842 - avoid nested pop (or push) calls. 843 - cannot push before pop. 844 - cannot call this if pcbddc->local_mat is NULL 845 */ 846 if (!pcbddc->benign_n) { 847 PetscFunctionReturn(0); 848 } 849 if (pop) { 850 if (pcbddc->benign_change_explicit) { 851 IS is_p0; 852 MatReuse reuse; 853 854 /* extract B_0 */ 855 reuse = MAT_INITIAL_MATRIX; 856 if (pcbddc->benign_B0) { 857 reuse = MAT_REUSE_MATRIX; 858 } 859 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 861 /* remove rows and cols from local problem */ 862 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 863 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 864 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 865 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 866 } else { 867 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 868 PetscScalar *vals; 869 PetscInt i,n,*idxs_ins; 870 871 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 872 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 873 if (!pcbddc->benign_B0) { 874 PetscInt *nnz; 875 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 876 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 877 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 878 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 879 for (i=0;i<pcbddc->benign_n;i++) { 880 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 881 nnz[i] = n - nnz[i]; 882 } 883 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 884 ierr = PetscFree(nnz);CHKERRQ(ierr); 885 } 886 887 for (i=0;i<pcbddc->benign_n;i++) { 888 PetscScalar *array; 889 PetscInt *idxs,j,nz,cum; 890 891 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 892 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 893 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 for (j=0;j<nz;j++) vals[j] = 1.; 895 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 896 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 897 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 898 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 899 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 900 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 901 cum = 0; 902 for (j=0;j<n;j++) { 903 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 904 vals[cum] = array[j]; 905 idxs_ins[cum] = j; 906 cum++; 907 } 908 } 909 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 911 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 916 } 917 } else { /* push */ 918 if (pcbddc->benign_change_explicit) { 919 PetscInt i; 920 921 for (i=0;i<pcbddc->benign_n;i++) { 922 PetscScalar *B0_vals; 923 PetscInt *B0_cols,B0_ncol; 924 925 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 926 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 927 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 928 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 929 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 930 } 931 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 933 } else { 934 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "PCBDDCAdaptiveSelection" 942 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 943 { 944 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 945 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 946 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 947 PetscBLASInt *B_iwork,*B_ifail; 948 PetscScalar *work,lwork; 949 PetscScalar *St,*S,*eigv; 950 PetscScalar *Sarray,*Starray; 951 PetscReal *eigs,thresh; 952 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 953 PetscBool allocated_S_St; 954 #if defined(PETSC_USE_COMPLEX) 955 PetscReal *rwork; 956 #endif 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 if (!sub_schurs->schur_explicit) { 961 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 962 } 963 964 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 965 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 966 } 967 968 if (pcbddc->dbg_flag) { 969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 970 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 971 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 972 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 973 } 974 975 if (pcbddc->dbg_flag) { 976 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 977 } 978 979 /* max size of subsets */ 980 mss = 0; 981 for (i=0;i<sub_schurs->n_subs;i++) { 982 PetscInt subset_size; 983 984 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 985 mss = PetscMax(mss,subset_size); 986 } 987 988 /* min/max and threshold */ 989 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 990 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 991 nmax = PetscMax(nmin,nmax); 992 allocated_S_St = PETSC_FALSE; 993 if (nmin) { 994 allocated_S_St = PETSC_TRUE; 995 } 996 997 /* allocate lapack workspace */ 998 cum = cum2 = 0; 999 maxneigs = 0; 1000 for (i=0;i<sub_schurs->n_subs;i++) { 1001 PetscInt n,subset_size; 1002 1003 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1004 n = PetscMin(subset_size,nmax); 1005 cum += subset_size; 1006 cum2 += subset_size*n; 1007 maxneigs = PetscMax(maxneigs,n); 1008 } 1009 if (mss) { 1010 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1011 PetscBLASInt B_itype = 1; 1012 PetscBLASInt B_N = mss; 1013 PetscReal zero = 0.0; 1014 PetscReal eps = 0.0; /* dlamch? */ 1015 1016 B_lwork = -1; 1017 S = NULL; 1018 St = NULL; 1019 eigs = NULL; 1020 eigv = NULL; 1021 B_iwork = NULL; 1022 B_ifail = NULL; 1023 #if defined(PETSC_USE_COMPLEX) 1024 rwork = NULL; 1025 #endif 1026 thresh = 1.0; 1027 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1028 #if defined(PETSC_USE_COMPLEX) 1029 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1030 #else 1031 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1032 #endif 1033 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1034 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1035 } else { 1036 /* TODO */ 1037 } 1038 } else { 1039 lwork = 0; 1040 } 1041 1042 nv = 0; 1043 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1044 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1045 } 1046 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1047 if (allocated_S_St) { 1048 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1049 } 1050 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1051 #if defined(PETSC_USE_COMPLEX) 1052 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1053 #endif 1054 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1055 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1056 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1057 nv+cum,&pcbddc->adaptive_constraints_idxs, 1058 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1059 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1060 1061 maxneigs = 0; 1062 cum = cumarray = 0; 1063 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1064 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1065 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1066 const PetscInt *idxs; 1067 1068 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1069 for (cum=0;cum<nv;cum++) { 1070 pcbddc->adaptive_constraints_n[cum] = 1; 1071 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1072 pcbddc->adaptive_constraints_data[cum] = 1.0; 1073 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1074 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1075 } 1076 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1077 } 1078 1079 if (mss) { /* multilevel */ 1080 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1081 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1082 } 1083 1084 thresh = pcbddc->adaptive_threshold; 1085 for (i=0;i<sub_schurs->n_subs;i++) { 1086 const PetscInt *idxs; 1087 PetscReal infty = PETSC_MAX_REAL; 1088 PetscInt j,subset_size,eigs_start = 0; 1089 PetscBLASInt B_N; 1090 PetscBool same_data = PETSC_FALSE; 1091 1092 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1093 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1094 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1095 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1096 if (sub_schurs->is_hermitian) { 1097 PetscInt j,k; 1098 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1099 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1100 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1101 } 1102 for (j=0;j<subset_size;j++) { 1103 for (k=j;k<subset_size;k++) { 1104 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1105 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1106 } 1107 } 1108 } else { 1109 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1110 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1111 } 1112 } else { 1113 S = Sarray + cumarray; 1114 St = Starray + cumarray; 1115 } 1116 /* see if we can save some work */ 1117 if (sub_schurs->n_subs == 1) { 1118 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1119 } 1120 1121 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1122 B_neigs = 0; 1123 } else { 1124 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1125 PetscBLASInt B_itype = 1; 1126 PetscBLASInt B_IL, B_IU; 1127 PetscReal eps = -1.0; /* dlamch? */ 1128 PetscInt nmin_s; 1129 1130 if (pcbddc->dbg_flag) { 1131 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1132 } 1133 1134 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1135 if (thresh > 1.+PETSC_SMALL) { 1136 1137 /* ask for eigenvalues larger than thresh */ 1138 #if defined(PETSC_USE_COMPLEX) 1139 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1140 #else 1141 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1142 #endif 1143 } else { 1144 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1145 B_IL = 1; 1146 #if defined(PETSC_USE_COMPLEX) 1147 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1148 #else 1149 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1150 #endif 1151 } 1152 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1153 if (B_ierr) { 1154 if (B_ierr < 0 ) { 1155 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1156 } else if (B_ierr <= B_N) { 1157 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1158 } else { 1159 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1160 } 1161 } 1162 1163 if (B_neigs > nmax) { 1164 if (pcbddc->dbg_flag) { 1165 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1166 } 1167 eigs_start = B_neigs -nmax; 1168 B_neigs = nmax; 1169 } 1170 1171 nmin_s = PetscMin(nmin,B_N); 1172 if (B_neigs < nmin_s) { 1173 PetscBLASInt B_neigs2; 1174 1175 B_IU = B_N - B_neigs; 1176 B_IL = B_N - nmin_s + 1; 1177 if (pcbddc->dbg_flag) { 1178 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1179 } 1180 if (sub_schurs->is_hermitian) { 1181 PetscInt j; 1182 for (j=0;j<subset_size;j++) { 1183 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1184 } 1185 for (j=0;j<subset_size;j++) { 1186 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1187 } 1188 } else { 1189 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1190 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1191 } 1192 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1193 #if defined(PETSC_USE_COMPLEX) 1194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1195 #else 1196 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1197 #endif 1198 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1199 B_neigs += B_neigs2; 1200 } 1201 if (B_ierr) { 1202 if (B_ierr < 0 ) { 1203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1204 } else if (B_ierr <= B_N) { 1205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1206 } else { 1207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1208 } 1209 } 1210 if (pcbddc->dbg_flag) { 1211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1212 for (j=0;j<B_neigs;j++) { 1213 if (eigs[j] == 0.0) { 1214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1217 } 1218 } 1219 } 1220 } else { 1221 /* TODO */ 1222 } 1223 } 1224 /* change the basis back to the original one */ 1225 if (sub_schurs->change) { 1226 Mat change,phi,phit; 1227 1228 if (pcbddc->dbg_flag > 1) { 1229 PetscInt ii; 1230 for (ii=0;ii<B_neigs;ii++) { 1231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1232 for (j=0;j<B_N;j++) { 1233 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1234 } 1235 } 1236 } 1237 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1238 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1239 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1240 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1241 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1242 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1243 } 1244 maxneigs = PetscMax(B_neigs,maxneigs); 1245 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1246 if (B_neigs) { 1247 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1248 1249 if (pcbddc->dbg_flag > 1) { 1250 PetscInt ii; 1251 for (ii=0;ii<B_neigs;ii++) { 1252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1253 for (j=0;j<B_N;j++) { 1254 #if defined(PETSC_USE_COMPLEX) 1255 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1256 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1258 #else 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1260 #endif 1261 } 1262 } 1263 } 1264 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1265 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1266 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1267 cum++; 1268 } 1269 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1270 /* shift for next computation */ 1271 cumarray += subset_size*subset_size; 1272 } 1273 if (pcbddc->dbg_flag) { 1274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1275 } 1276 1277 if (mss) { 1278 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1279 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1280 /* destroy matrices (junk) */ 1281 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1282 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1283 } 1284 if (allocated_S_St) { 1285 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1288 #if defined(PETSC_USE_COMPLEX) 1289 ierr = PetscFree(rwork);CHKERRQ(ierr); 1290 #endif 1291 if (pcbddc->dbg_flag) { 1292 PetscInt maxneigs_r; 1293 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "PCBDDCSetUpSolvers" 1301 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1302 { 1303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1304 PetscScalar *coarse_submat_vals; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1309 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1310 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1311 1312 /* Setup local neumann solver ksp_R */ 1313 /* PCBDDCSetUpLocalScatters should be called first! */ 1314 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1315 1316 /* Change global null space passed in by the user if change of basis has been requested */ 1317 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1318 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1319 } 1320 1321 /* 1322 Setup local correction and local part of coarse basis. 1323 Gives back the dense local part of the coarse matrix in column major ordering 1324 */ 1325 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1326 1327 /* Compute total number of coarse nodes and setup coarse solver */ 1328 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1329 1330 /* free */ 1331 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "PCBDDCResetCustomization" 1337 PetscErrorCode PCBDDCResetCustomization(PC pc) 1338 { 1339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1344 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1345 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1346 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1347 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1348 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1349 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1350 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1351 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1352 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1353 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1354 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "PCBDDCResetTopography" 1360 PetscErrorCode PCBDDCResetTopography(PC pc) 1361 { 1362 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1363 PetscInt i; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1368 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1369 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1370 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1371 for (i=0;i<pcbddc->n_local_subs;i++) { 1372 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1373 } 1374 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1375 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "PCBDDCResetSolvers" 1381 PetscErrorCode PCBDDCResetSolvers(PC pc) 1382 { 1383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1388 if (pcbddc->coarse_phi_B) { 1389 PetscScalar *array; 1390 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1391 ierr = PetscFree(array);CHKERRQ(ierr); 1392 } 1393 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1394 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1395 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1396 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1397 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1398 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1399 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1400 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1401 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1402 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1403 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1404 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1405 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1406 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1407 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1408 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1409 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1410 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1411 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1412 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1413 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1414 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1415 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1416 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1417 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1418 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1419 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1420 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1421 if (pcbddc->benign_zerodiag_subs) { 1422 PetscInt i; 1423 for (i=0;i<pcbddc->benign_n;i++) { 1424 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1427 } 1428 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1434 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1435 { 1436 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1437 PC_IS *pcis = (PC_IS*)pc->data; 1438 VecType impVecType; 1439 PetscInt n_constraints,n_R,old_size; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 if (!pcbddc->ConstraintMatrix) { 1444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1445 } 1446 /* get sizes */ 1447 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1448 n_R = pcis->n - pcbddc->n_vertices; 1449 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1450 /* local work vectors (try to avoid unneeded work)*/ 1451 /* R nodes */ 1452 old_size = -1; 1453 if (pcbddc->vec1_R) { 1454 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1455 } 1456 if (n_R != old_size) { 1457 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1458 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1459 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1460 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1461 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1462 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1463 } 1464 /* local primal dofs */ 1465 old_size = -1; 1466 if (pcbddc->vec1_P) { 1467 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1468 } 1469 if (pcbddc->local_primal_size != old_size) { 1470 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1471 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1472 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1473 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1474 } 1475 /* local explicit constraints */ 1476 old_size = -1; 1477 if (pcbddc->vec1_C) { 1478 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1479 } 1480 if (n_constraints && n_constraints != old_size) { 1481 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1483 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1484 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1485 } 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "PCBDDCSetUpCorrection" 1491 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1492 { 1493 PetscErrorCode ierr; 1494 /* pointers to pcis and pcbddc */ 1495 PC_IS* pcis = (PC_IS*)pc->data; 1496 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1497 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1498 /* submatrices of local problem */ 1499 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1500 /* submatrices of local coarse problem */ 1501 Mat S_VV,S_CV,S_VC,S_CC; 1502 /* working matrices */ 1503 Mat C_CR; 1504 /* additional working stuff */ 1505 PC pc_R; 1506 Mat F; 1507 Vec dummy_vec; 1508 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1509 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1510 PetscScalar *work; 1511 PetscInt *idx_V_B; 1512 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1513 PetscInt i,n_R,n_D,n_B; 1514 1515 /* some shortcuts to scalars */ 1516 PetscScalar one=1.0,m_one=-1.0; 1517 1518 PetscFunctionBegin; 1519 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1520 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1521 } 1522 1523 /* Set Non-overlapping dimensions */ 1524 n_vertices = pcbddc->n_vertices; 1525 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1526 n_B = pcis->n_B; 1527 n_D = pcis->n - n_B; 1528 n_R = pcis->n - n_vertices; 1529 1530 /* vertices in boundary numbering */ 1531 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1532 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1533 if (i != n_vertices) { 1534 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1535 } 1536 1537 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1538 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1540 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1542 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1544 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1545 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1546 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1547 1548 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1549 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1550 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1551 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1552 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1553 lda_rhs = n_R; 1554 need_benign_correction = PETSC_FALSE; 1555 if (isLU || isILU || isCHOL) { 1556 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1557 } else if (sub_schurs->reuse_solver) { 1558 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1559 MatFactorType type; 1560 1561 F = reuse_solver->F; 1562 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1563 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1564 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1565 need_benign_correction = !!reuse_solver->benign_n; 1566 } else { 1567 F = NULL; 1568 } 1569 1570 /* allocate workspace */ 1571 n = 0; 1572 if (n_constraints) { 1573 n += lda_rhs*n_constraints; 1574 } 1575 if (n_vertices) { 1576 n = PetscMax(2*lda_rhs*n_vertices,n); 1577 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1578 } 1579 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1580 1581 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1582 dummy_vec = NULL; 1583 if (need_benign_correction && lda_rhs != n_R && F) { 1584 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1585 } 1586 1587 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1588 if (n_constraints) { 1589 Mat M1,M2,M3,C_B; 1590 IS is_aux; 1591 PetscScalar *array,*array2; 1592 1593 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1594 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1595 1596 /* Extract constraints on R nodes: C_{CR} */ 1597 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1598 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1599 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1600 1601 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1602 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1603 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1604 for (i=0;i<n_constraints;i++) { 1605 const PetscScalar *row_cmat_values; 1606 const PetscInt *row_cmat_indices; 1607 PetscInt size_of_constraint,j; 1608 1609 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1610 for (j=0;j<size_of_constraint;j++) { 1611 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1612 } 1613 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1614 } 1615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1616 if (F) { 1617 Mat B; 1618 1619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1620 if (need_benign_correction) { 1621 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1622 1623 /* rhs is already zero on interior dofs, no need to change the rhs */ 1624 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1625 } 1626 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1627 if (need_benign_correction) { 1628 PetscScalar *marr; 1629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1630 1631 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1632 if (lda_rhs != n_R) { 1633 for (i=0;i<n_constraints;i++) { 1634 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1635 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1636 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1637 } 1638 } else { 1639 for (i=0;i<n_constraints;i++) { 1640 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1641 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1642 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1643 } 1644 } 1645 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1646 } 1647 ierr = MatDestroy(&B);CHKERRQ(ierr); 1648 } else { 1649 PetscScalar *marr; 1650 1651 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1652 for (i=0;i<n_constraints;i++) { 1653 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1654 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1655 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1656 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1658 } 1659 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1660 } 1661 if (!pcbddc->switch_static) { 1662 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1663 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1664 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1665 for (i=0;i<n_constraints;i++) { 1666 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1667 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1668 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1670 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1671 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1672 } 1673 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1674 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1675 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1676 } else { 1677 if (lda_rhs != n_R) { 1678 IS dummy; 1679 1680 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1681 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1682 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1683 } else { 1684 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1685 pcbddc->local_auxmat2 = local_auxmat2_R; 1686 } 1687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1688 } 1689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1694 if (isCHOL) { 1695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1696 } else { 1697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1698 } 1699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1705 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1706 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1709 } 1710 1711 /* Get submatrices from subdomain matrix */ 1712 if (n_vertices) { 1713 IS is_aux; 1714 1715 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1716 IS tis; 1717 1718 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1719 ierr = ISSort(tis);CHKERRQ(ierr); 1720 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1721 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1722 } else { 1723 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1724 } 1725 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1727 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1728 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1729 } 1730 1731 /* Matrix of coarse basis functions (local) */ 1732 if (pcbddc->coarse_phi_B) { 1733 PetscInt on_B,on_primal,on_D=n_D; 1734 if (pcbddc->coarse_phi_D) { 1735 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1736 } 1737 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1738 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1739 PetscScalar *marray; 1740 1741 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1742 ierr = PetscFree(marray);CHKERRQ(ierr); 1743 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1744 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1745 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1746 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1747 } 1748 } 1749 1750 if (!pcbddc->coarse_phi_B) { 1751 PetscScalar *marray; 1752 1753 n = n_B*pcbddc->local_primal_size; 1754 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1755 n += n_D*pcbddc->local_primal_size; 1756 } 1757 if (!pcbddc->symmetric_primal) { 1758 n *= 2; 1759 } 1760 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1762 n = n_B*pcbddc->local_primal_size; 1763 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1765 n += n_D*pcbddc->local_primal_size; 1766 } 1767 if (!pcbddc->symmetric_primal) { 1768 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1769 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1770 n = n_B*pcbddc->local_primal_size; 1771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1772 } 1773 } else { 1774 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1775 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1776 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1777 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1778 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1779 } 1780 } 1781 } 1782 1783 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1784 p0_lidx_I = NULL; 1785 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1786 const PetscInt *idxs; 1787 1788 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1789 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1790 for (i=0;i<pcbddc->benign_n;i++) { 1791 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1792 } 1793 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1794 } 1795 1796 /* vertices */ 1797 if (n_vertices) { 1798 1799 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1800 1801 if (n_R) { 1802 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1803 PetscBLASInt B_N,B_one = 1; 1804 PetscScalar *x,*y; 1805 PetscBool isseqaij; 1806 1807 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1808 if (need_benign_correction) { 1809 ISLocalToGlobalMapping RtoN; 1810 IS is_p0; 1811 PetscInt *idxs_p0,n; 1812 1813 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1814 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1815 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1816 if (n != pcbddc->benign_n) { 1817 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1818 } 1819 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1820 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1821 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1822 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1823 } 1824 1825 if (lda_rhs == n_R) { 1826 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1827 } else { 1828 PetscScalar *av,*array; 1829 const PetscInt *xadj,*adjncy; 1830 PetscInt n; 1831 PetscBool flg_row; 1832 1833 array = work+lda_rhs*n_vertices; 1834 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1835 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1836 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1837 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1838 for (i=0;i<n;i++) { 1839 PetscInt j; 1840 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1841 } 1842 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1843 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1844 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1845 } 1846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1847 if (need_benign_correction) { 1848 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1849 PetscScalar *marr; 1850 1851 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1852 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1853 1854 | 0 0 0 | (V) 1855 L = | 0 0 -1 | (P-p0) 1856 | 0 0 -1 | (p0) 1857 1858 */ 1859 for (i=0;i<reuse_solver->benign_n;i++) { 1860 const PetscScalar *vals; 1861 const PetscInt *idxs,*idxs_zero; 1862 PetscInt n,j,nz; 1863 1864 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1865 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1866 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1867 for (j=0;j<n;j++) { 1868 PetscScalar val = vals[j]; 1869 PetscInt k,col = idxs[j]; 1870 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1871 } 1872 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1873 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1874 } 1875 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1876 } 1877 if (F) { 1878 /* need to correct the rhs */ 1879 if (need_benign_correction) { 1880 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1881 PetscScalar *marr; 1882 1883 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1884 if (lda_rhs != n_R) { 1885 for (i=0;i<n_vertices;i++) { 1886 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1887 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1888 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1889 } 1890 } else { 1891 for (i=0;i<n_vertices;i++) { 1892 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1893 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1894 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1895 } 1896 } 1897 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1898 } 1899 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1900 /* need to correct the solution */ 1901 if (need_benign_correction) { 1902 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1903 PetscScalar *marr; 1904 1905 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1906 if (lda_rhs != n_R) { 1907 for (i=0;i<n_vertices;i++) { 1908 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1909 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1910 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1911 } 1912 } else { 1913 for (i=0;i<n_vertices;i++) { 1914 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1915 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1916 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1917 } 1918 } 1919 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1920 } 1921 } else { 1922 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1923 for (i=0;i<n_vertices;i++) { 1924 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1925 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1926 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1927 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1928 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1929 } 1930 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1931 } 1932 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1933 /* S_VV and S_CV */ 1934 if (n_constraints) { 1935 Mat B; 1936 1937 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1938 for (i=0;i<n_vertices;i++) { 1939 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1940 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1941 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1942 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1943 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1944 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1945 } 1946 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1947 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1948 ierr = MatDestroy(&B);CHKERRQ(ierr); 1949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1950 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1951 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1953 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1954 ierr = MatDestroy(&B);CHKERRQ(ierr); 1955 } 1956 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1957 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1958 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1959 } 1960 if (lda_rhs != n_R) { 1961 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1962 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1963 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1964 } 1965 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1966 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1967 if (need_benign_correction) { 1968 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1969 PetscScalar *marr,*sums; 1970 1971 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1972 ierr = MatDenseGetArray(S_VVt,&marr); 1973 for (i=0;i<reuse_solver->benign_n;i++) { 1974 const PetscScalar *vals; 1975 const PetscInt *idxs,*idxs_zero; 1976 PetscInt n,j,nz; 1977 1978 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1979 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1980 for (j=0;j<n_vertices;j++) { 1981 PetscInt k; 1982 sums[j] = 0.; 1983 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1984 } 1985 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1986 for (j=0;j<n;j++) { 1987 PetscScalar val = vals[j]; 1988 PetscInt k; 1989 for (k=0;k<n_vertices;k++) { 1990 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1991 } 1992 } 1993 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1994 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1995 } 1996 ierr = PetscFree(sums);CHKERRQ(ierr); 1997 ierr = MatDenseRestoreArray(S_VVt,&marr); 1998 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1999 } 2000 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2001 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2002 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2003 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2004 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2005 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2006 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2007 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2008 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2009 } else { 2010 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2011 } 2012 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2013 2014 /* coarse basis functions */ 2015 for (i=0;i<n_vertices;i++) { 2016 PetscScalar *y; 2017 2018 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2019 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2020 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2021 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 y[n_B*i+idx_V_B[i]] = 1.0; 2024 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2025 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2026 2027 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2028 PetscInt j; 2029 2030 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2031 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2032 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2033 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2035 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2036 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2037 } 2038 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2039 } 2040 /* if n_R == 0 the object is not destroyed */ 2041 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2042 } 2043 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2044 2045 if (n_constraints) { 2046 Mat B; 2047 2048 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2049 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2050 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2051 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2052 if (n_vertices) { 2053 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2054 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2055 } else { 2056 Mat S_VCt; 2057 2058 if (lda_rhs != n_R) { 2059 ierr = MatDestroy(&B);CHKERRQ(ierr); 2060 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2061 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2062 } 2063 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2064 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2065 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2066 } 2067 } 2068 ierr = MatDestroy(&B);CHKERRQ(ierr); 2069 /* coarse basis functions */ 2070 for (i=0;i<n_constraints;i++) { 2071 PetscScalar *y; 2072 2073 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2074 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2075 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2076 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2077 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2078 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2079 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2080 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2081 PetscInt j; 2082 2083 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2084 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2085 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2086 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2087 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2088 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2089 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2090 } 2091 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2092 } 2093 } 2094 if (n_constraints) { 2095 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2096 } 2097 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2098 2099 /* coarse matrix entries relative to B_0 */ 2100 if (pcbddc->benign_n) { 2101 Mat B0_B,B0_BPHI; 2102 IS is_dummy; 2103 PetscScalar *data; 2104 PetscInt j; 2105 2106 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2107 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2108 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2109 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2110 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2111 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2112 for (j=0;j<pcbddc->benign_n;j++) { 2113 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2114 for (i=0;i<pcbddc->local_primal_size;i++) { 2115 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2116 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2117 } 2118 } 2119 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2120 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2121 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2122 } 2123 2124 /* compute other basis functions for non-symmetric problems */ 2125 if (!pcbddc->symmetric_primal) { 2126 Mat B_V=NULL,B_C=NULL; 2127 PetscScalar *marray; 2128 2129 if (n_constraints) { 2130 Mat S_CCT,C_CRT; 2131 2132 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2133 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2134 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2135 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2136 if (n_vertices) { 2137 Mat S_VCT; 2138 2139 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2140 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2141 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2142 } 2143 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2144 } 2145 if (n_vertices && n_R) { 2146 PetscScalar *av,*marray; 2147 const PetscInt *xadj,*adjncy; 2148 PetscInt n; 2149 PetscBool flg_row; 2150 2151 /* B_V = B_V - A_VR^T */ 2152 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2153 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2154 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2155 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2156 for (i=0;i<n;i++) { 2157 PetscInt j; 2158 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2159 } 2160 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2161 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2162 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2163 } 2164 2165 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2166 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2167 for (i=0;i<n_vertices;i++) { 2168 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2169 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2170 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2171 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2172 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2173 } 2174 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2175 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2176 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2177 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2178 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2179 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2180 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2181 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2182 } 2183 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2184 /* coarse basis functions */ 2185 for (i=0;i<pcbddc->local_primal_size;i++) { 2186 PetscScalar *y; 2187 2188 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2189 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2190 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2191 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2192 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2193 if (i<n_vertices) { 2194 y[n_B*i+idx_V_B[i]] = 1.0; 2195 } 2196 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2197 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2198 2199 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2200 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2201 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2202 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2203 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2204 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2205 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2206 } 2207 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2208 } 2209 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2210 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2211 } 2212 /* free memory */ 2213 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2214 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2215 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2216 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2217 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2218 ierr = PetscFree(work);CHKERRQ(ierr); 2219 if (n_vertices) { 2220 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2221 } 2222 if (n_constraints) { 2223 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2224 } 2225 /* Checking coarse_sub_mat and coarse basis functios */ 2226 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2227 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2228 if (pcbddc->dbg_flag) { 2229 Mat coarse_sub_mat; 2230 Mat AUXMAT,TM1,TM2,TM3,TM4; 2231 Mat coarse_phi_D,coarse_phi_B; 2232 Mat coarse_psi_D,coarse_psi_B; 2233 Mat A_II,A_BB,A_IB,A_BI; 2234 Mat C_B,CPHI; 2235 IS is_dummy; 2236 Vec mones; 2237 MatType checkmattype=MATSEQAIJ; 2238 PetscReal real_value; 2239 2240 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2241 Mat A; 2242 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2243 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2244 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2245 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2246 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2247 ierr = MatDestroy(&A);CHKERRQ(ierr); 2248 } else { 2249 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2250 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2251 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2252 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2253 } 2254 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2255 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2256 if (!pcbddc->symmetric_primal) { 2257 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2258 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2259 } 2260 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2261 2262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2265 if (!pcbddc->symmetric_primal) { 2266 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2267 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2268 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2269 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2270 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2271 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2272 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2274 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2276 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2277 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2278 } else { 2279 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2280 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2281 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2282 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2283 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2284 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2285 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2286 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2287 } 2288 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2289 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2290 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2291 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2292 if (pcbddc->benign_n) { 2293 Mat B0_B,B0_BPHI; 2294 PetscScalar *data,*data2; 2295 PetscInt j; 2296 2297 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2298 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2299 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2300 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2301 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2302 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2303 for (j=0;j<pcbddc->benign_n;j++) { 2304 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2305 for (i=0;i<pcbddc->local_primal_size;i++) { 2306 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2307 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2308 } 2309 } 2310 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2311 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2312 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2313 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2314 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2315 } 2316 #if 0 2317 { 2318 PetscViewer viewer; 2319 char filename[256]; 2320 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2321 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2322 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2323 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2324 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2325 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2326 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2327 if (save_change) { 2328 Mat phi_B; 2329 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2330 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2331 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2332 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2333 } else { 2334 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2335 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2336 } 2337 if (pcbddc->coarse_phi_D) { 2338 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2339 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2340 } 2341 if (pcbddc->coarse_psi_B) { 2342 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2343 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2344 } 2345 if (pcbddc->coarse_psi_D) { 2346 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2347 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2348 } 2349 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2350 } 2351 #endif 2352 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2353 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2354 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2355 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2356 2357 /* check constraints */ 2358 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2359 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2360 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2361 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2362 } else { 2363 PetscScalar *data; 2364 Mat tmat; 2365 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2366 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2367 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2368 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2369 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2370 } 2371 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2372 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2373 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2374 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2375 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2376 if (!pcbddc->symmetric_primal) { 2377 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2378 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2379 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2380 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2381 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2382 } 2383 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2384 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2385 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2386 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2388 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2389 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2390 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2391 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2392 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2393 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2394 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2395 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2396 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2397 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2398 if (!pcbddc->symmetric_primal) { 2399 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2400 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2401 } 2402 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2403 } 2404 /* get back data */ 2405 *coarse_submat_vals_n = coarse_submat_vals; 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2411 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2412 { 2413 Mat *work_mat; 2414 IS isrow_s,iscol_s; 2415 PetscBool rsorted,csorted; 2416 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2421 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2422 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2423 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2424 2425 if (!rsorted) { 2426 const PetscInt *idxs; 2427 PetscInt *idxs_sorted,i; 2428 2429 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2430 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2431 for (i=0;i<rsize;i++) { 2432 idxs_perm_r[i] = i; 2433 } 2434 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2435 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2436 for (i=0;i<rsize;i++) { 2437 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2438 } 2439 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2440 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2441 } else { 2442 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2443 isrow_s = isrow; 2444 } 2445 2446 if (!csorted) { 2447 if (isrow == iscol) { 2448 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2449 iscol_s = isrow_s; 2450 } else { 2451 const PetscInt *idxs; 2452 PetscInt *idxs_sorted,i; 2453 2454 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2455 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2456 for (i=0;i<csize;i++) { 2457 idxs_perm_c[i] = i; 2458 } 2459 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2460 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2461 for (i=0;i<csize;i++) { 2462 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2463 } 2464 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2465 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2466 } 2467 } else { 2468 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2469 iscol_s = iscol; 2470 } 2471 2472 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2473 2474 if (!rsorted || !csorted) { 2475 Mat new_mat; 2476 IS is_perm_r,is_perm_c; 2477 2478 if (!rsorted) { 2479 PetscInt *idxs_r,i; 2480 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2481 for (i=0;i<rsize;i++) { 2482 idxs_r[idxs_perm_r[i]] = i; 2483 } 2484 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2485 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2486 } else { 2487 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2488 } 2489 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2490 2491 if (!csorted) { 2492 if (isrow_s == iscol_s) { 2493 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2494 is_perm_c = is_perm_r; 2495 } else { 2496 PetscInt *idxs_c,i; 2497 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2498 for (i=0;i<csize;i++) { 2499 idxs_c[idxs_perm_c[i]] = i; 2500 } 2501 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2502 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2503 } 2504 } else { 2505 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2506 } 2507 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2508 2509 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2510 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2511 work_mat[0] = new_mat; 2512 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2513 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2514 } 2515 2516 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2517 *B = work_mat[0]; 2518 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2519 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2520 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2526 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2527 { 2528 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2529 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2530 Mat new_mat; 2531 IS is_local,is_global; 2532 PetscInt local_size; 2533 PetscBool isseqaij; 2534 PetscErrorCode ierr; 2535 2536 PetscFunctionBegin; 2537 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2538 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2539 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2540 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2541 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2542 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2543 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2544 2545 /* check */ 2546 if (pcbddc->dbg_flag) { 2547 Vec x,x_change; 2548 PetscReal error; 2549 2550 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2551 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2552 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2553 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2554 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2555 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2556 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2557 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2558 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2559 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2560 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2561 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2562 ierr = VecDestroy(&x);CHKERRQ(ierr); 2563 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2564 } 2565 2566 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2567 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2568 if (isseqaij) { 2569 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2570 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2571 } else { 2572 Mat work_mat; 2573 2574 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2575 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2576 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2577 } 2578 if (matis->A->symmetric_set) { 2579 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2580 #if !defined(PETSC_USE_COMPLEX) 2581 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2582 #endif 2583 } 2584 /* 2585 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2586 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2587 */ 2588 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 #undef __FUNCT__ 2593 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2594 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2595 { 2596 PC_IS* pcis = (PC_IS*)(pc->data); 2597 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2598 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2599 PetscInt *idx_R_local=NULL; 2600 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2601 PetscInt vbs,bs; 2602 PetscBT bitmask=NULL; 2603 PetscErrorCode ierr; 2604 2605 PetscFunctionBegin; 2606 /* 2607 No need to setup local scatters if 2608 - primal space is unchanged 2609 AND 2610 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2611 AND 2612 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2613 */ 2614 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2615 PetscFunctionReturn(0); 2616 } 2617 /* destroy old objects */ 2618 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2619 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2620 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2621 /* Set Non-overlapping dimensions */ 2622 n_B = pcis->n_B; 2623 n_D = pcis->n - n_B; 2624 n_vertices = pcbddc->n_vertices; 2625 2626 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2627 2628 /* create auxiliary bitmask and allocate workspace */ 2629 if (!sub_schurs->reuse_solver) { 2630 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2631 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2632 for (i=0;i<n_vertices;i++) { 2633 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2634 } 2635 2636 for (i=0, n_R=0; i<pcis->n; i++) { 2637 if (!PetscBTLookup(bitmask,i)) { 2638 idx_R_local[n_R++] = i; 2639 } 2640 } 2641 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2642 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2643 2644 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2645 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2646 } 2647 2648 /* Block code */ 2649 vbs = 1; 2650 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2651 if (bs>1 && !(n_vertices%bs)) { 2652 PetscBool is_blocked = PETSC_TRUE; 2653 PetscInt *vary; 2654 if (!sub_schurs->reuse_solver) { 2655 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2656 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2657 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2658 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2659 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2660 for (i=0; i<pcis->n/bs; i++) { 2661 if (vary[i]!=0 && vary[i]!=bs) { 2662 is_blocked = PETSC_FALSE; 2663 break; 2664 } 2665 } 2666 ierr = PetscFree(vary);CHKERRQ(ierr); 2667 } else { 2668 /* Verify directly the R set */ 2669 for (i=0; i<n_R/bs; i++) { 2670 PetscInt j,node=idx_R_local[bs*i]; 2671 for (j=1; j<bs; j++) { 2672 if (node != idx_R_local[bs*i+j]-j) { 2673 is_blocked = PETSC_FALSE; 2674 break; 2675 } 2676 } 2677 } 2678 } 2679 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2680 vbs = bs; 2681 for (i=0;i<n_R/vbs;i++) { 2682 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2683 } 2684 } 2685 } 2686 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2687 if (sub_schurs->reuse_solver) { 2688 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2689 2690 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2691 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2692 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2693 reuse_solver->is_R = pcbddc->is_R_local; 2694 } else { 2695 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2696 } 2697 2698 /* print some info if requested */ 2699 if (pcbddc->dbg_flag) { 2700 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2702 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2706 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2707 } 2708 2709 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2710 if (!sub_schurs->reuse_solver) { 2711 IS is_aux1,is_aux2; 2712 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2713 2714 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2715 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2716 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2717 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2718 for (i=0; i<n_D; i++) { 2719 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2720 } 2721 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2722 for (i=0, j=0; i<n_R; i++) { 2723 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2724 aux_array1[j++] = i; 2725 } 2726 } 2727 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2728 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2729 for (i=0, j=0; i<n_B; i++) { 2730 if (!PetscBTLookup(bitmask,is_indices[i])) { 2731 aux_array2[j++] = i; 2732 } 2733 } 2734 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2735 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2736 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2737 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2738 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2739 2740 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2741 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2742 for (i=0, j=0; i<n_R; i++) { 2743 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2744 aux_array1[j++] = i; 2745 } 2746 } 2747 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2748 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2749 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2750 } 2751 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2752 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2753 } else { 2754 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2755 IS tis; 2756 PetscInt schur_size; 2757 2758 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2759 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2760 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2761 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2763 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2764 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2765 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2766 } 2767 } 2768 PetscFunctionReturn(0); 2769 } 2770 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2774 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2775 { 2776 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2777 PC_IS *pcis = (PC_IS*)pc->data; 2778 PC pc_temp; 2779 Mat A_RR; 2780 MatReuse reuse; 2781 PetscScalar m_one = -1.0; 2782 PetscReal value; 2783 PetscInt n_D,n_R; 2784 PetscBool use_exact,use_exact_reduced,issbaij; 2785 PetscErrorCode ierr; 2786 /* prefixes stuff */ 2787 char dir_prefix[256],neu_prefix[256],str_level[16]; 2788 size_t len; 2789 2790 PetscFunctionBegin; 2791 2792 /* compute prefixes */ 2793 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2794 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2795 if (!pcbddc->current_level) { 2796 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2797 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2798 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2799 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2800 } else { 2801 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2802 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2803 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2804 len -= 15; /* remove "pc_bddc_coarse_" */ 2805 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2806 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2807 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2808 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2809 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2810 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2811 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2812 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2813 } 2814 2815 /* DIRICHLET PROBLEM */ 2816 if (dirichlet) { 2817 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2818 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2819 if (!sub_schurs->reuse_solver) { 2820 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2821 } 2822 if (pcbddc->dbg_flag) { 2823 Mat A_IIn; 2824 2825 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2826 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2827 pcis->A_II = A_IIn; 2828 } 2829 } 2830 if (pcbddc->local_mat->symmetric_set) { 2831 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2832 } 2833 /* Matrix for Dirichlet problem is pcis->A_II */ 2834 n_D = pcis->n - pcis->n_B; 2835 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2836 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2837 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2838 /* default */ 2839 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2840 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2841 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2842 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2843 if (issbaij) { 2844 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2845 } else { 2846 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2847 } 2848 /* Allow user's customization */ 2849 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2850 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2851 } 2852 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2853 if (sub_schurs->reuse_solver) { 2854 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2855 2856 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2857 } 2858 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2859 if (!n_D) { 2860 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2861 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2862 } 2863 /* Set Up KSP for Dirichlet problem of BDDC */ 2864 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2865 /* set ksp_D into pcis data */ 2866 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2867 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2868 pcis->ksp_D = pcbddc->ksp_D; 2869 } 2870 2871 /* NEUMANN PROBLEM */ 2872 A_RR = 0; 2873 if (neumann) { 2874 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2875 PetscInt ibs,mbs; 2876 PetscBool issbaij; 2877 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2878 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2879 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2880 if (pcbddc->ksp_R) { /* already created ksp */ 2881 PetscInt nn_R; 2882 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2883 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2884 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2885 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2886 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2887 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2888 reuse = MAT_INITIAL_MATRIX; 2889 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2890 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2891 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2892 reuse = MAT_INITIAL_MATRIX; 2893 } else { /* safe to reuse the matrix */ 2894 reuse = MAT_REUSE_MATRIX; 2895 } 2896 } 2897 /* last check */ 2898 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2899 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2900 reuse = MAT_INITIAL_MATRIX; 2901 } 2902 } else { /* first time, so we need to create the matrix */ 2903 reuse = MAT_INITIAL_MATRIX; 2904 } 2905 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2906 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2907 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2908 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2909 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2910 if (matis->A == pcbddc->local_mat) { 2911 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2912 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2913 } else { 2914 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2915 } 2916 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2917 if (matis->A == pcbddc->local_mat) { 2918 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2919 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2920 } else { 2921 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2922 } 2923 } 2924 /* extract A_RR */ 2925 if (sub_schurs->reuse_solver) { 2926 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2927 2928 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2929 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2930 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2931 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2932 } else { 2933 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2934 } 2935 } else { 2936 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2937 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2938 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2939 } 2940 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2941 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2942 } 2943 if (pcbddc->local_mat->symmetric_set) { 2944 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2945 } 2946 if (!pcbddc->ksp_R) { /* create object if not present */ 2947 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2948 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2949 /* default */ 2950 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2951 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2952 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2953 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2954 if (issbaij) { 2955 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2956 } else { 2957 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2958 } 2959 /* Allow user's customization */ 2960 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2961 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2962 } 2963 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2964 if (!n_R) { 2965 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2966 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2967 } 2968 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2969 /* Reuse solver if it is present */ 2970 if (sub_schurs->reuse_solver) { 2971 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2972 2973 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2974 } 2975 /* Set Up KSP for Neumann problem of BDDC */ 2976 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2977 } 2978 2979 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2980 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2981 if (pcbddc->dbg_flag) { 2982 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2983 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2984 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2985 } 2986 if (dirichlet) { /* Dirichlet */ 2987 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2988 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2989 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2990 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2991 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2992 /* need to be adapted? */ 2993 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2994 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2995 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2996 /* print info */ 2997 if (pcbddc->dbg_flag) { 2998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3000 } 3001 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3002 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3003 } 3004 } 3005 if (neumann) { /* Neumann */ 3006 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3007 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3008 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3009 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3010 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3011 /* need to be adapted? */ 3012 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3013 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3014 /* print info */ 3015 if (pcbddc->dbg_flag) { 3016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3017 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3018 } 3019 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3020 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3021 } 3022 } 3023 } 3024 /* free Neumann problem's matrix */ 3025 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3026 PetscFunctionReturn(0); 3027 } 3028 3029 #undef __FUNCT__ 3030 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3031 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3032 { 3033 PetscErrorCode ierr; 3034 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3035 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3036 3037 PetscFunctionBegin; 3038 if (!sub_schurs->reuse_solver) { 3039 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3040 } 3041 if (!pcbddc->switch_static) { 3042 if (applytranspose && pcbddc->local_auxmat1) { 3043 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3044 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3045 } 3046 if (!sub_schurs->reuse_solver) { 3047 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3048 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3049 } else { 3050 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3051 3052 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3053 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3054 } 3055 } else { 3056 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3057 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3058 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3059 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3060 if (applytranspose && pcbddc->local_auxmat1) { 3061 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3062 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3063 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3064 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3065 } 3066 } 3067 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3068 if (applytranspose) { 3069 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3070 } else { 3071 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3072 } 3073 } else { 3074 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3075 3076 if (applytranspose) { 3077 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3078 } else { 3079 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3080 } 3081 } 3082 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3083 if (!pcbddc->switch_static) { 3084 if (!sub_schurs->reuse_solver) { 3085 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3087 } else { 3088 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3089 3090 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3091 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3092 } 3093 if (!applytranspose && pcbddc->local_auxmat1) { 3094 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3095 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3096 } 3097 } else { 3098 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 if (!applytranspose && pcbddc->local_auxmat1) { 3103 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3104 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3105 } 3106 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3107 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3108 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3110 } 3111 PetscFunctionReturn(0); 3112 } 3113 3114 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3115 #undef __FUNCT__ 3116 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3117 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3118 { 3119 PetscErrorCode ierr; 3120 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3121 PC_IS* pcis = (PC_IS*) (pc->data); 3122 const PetscScalar zero = 0.0; 3123 3124 PetscFunctionBegin; 3125 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3126 if (!pcbddc->benign_apply_coarse_only) { 3127 if (applytranspose) { 3128 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3129 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3130 } else { 3131 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3132 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3133 } 3134 } else { 3135 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3136 } 3137 3138 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3139 if (pcbddc->benign_n) { 3140 PetscScalar *array; 3141 PetscInt j; 3142 3143 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3144 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3145 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3146 } 3147 3148 /* start communications from local primal nodes to rhs of coarse solver */ 3149 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3150 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3151 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3152 3153 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3154 /* TODO remove null space when doing multilevel */ 3155 if (pcbddc->coarse_ksp) { 3156 Mat coarse_mat; 3157 Vec rhs,sol; 3158 MatNullSpace nullsp; 3159 3160 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3161 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3162 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3163 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3164 if (nullsp) { 3165 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3166 } 3167 if (applytranspose) { 3168 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3169 } else { 3170 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3171 } 3172 if (nullsp) { 3173 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3174 } 3175 } 3176 3177 /* Local solution on R nodes */ 3178 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3179 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3180 } 3181 /* communications from coarse sol to local primal nodes */ 3182 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3183 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3184 3185 /* Sum contributions from the two levels */ 3186 if (!pcbddc->benign_apply_coarse_only) { 3187 if (applytranspose) { 3188 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3189 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3190 } else { 3191 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3192 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3193 } 3194 /* store p0 */ 3195 if (pcbddc->benign_n) { 3196 PetscScalar *array; 3197 PetscInt j; 3198 3199 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3200 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3201 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3202 } 3203 } else { /* expand the coarse solution */ 3204 if (applytranspose) { 3205 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3206 } else { 3207 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3208 } 3209 } 3210 PetscFunctionReturn(0); 3211 } 3212 3213 #undef __FUNCT__ 3214 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3215 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3216 { 3217 PetscErrorCode ierr; 3218 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3219 PetscScalar *array; 3220 Vec from,to; 3221 3222 PetscFunctionBegin; 3223 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3224 from = pcbddc->coarse_vec; 3225 to = pcbddc->vec1_P; 3226 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3227 Vec tvec; 3228 3229 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3230 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3231 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3232 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3233 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3234 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3235 } 3236 } else { /* from local to global -> put data in coarse right hand side */ 3237 from = pcbddc->vec1_P; 3238 to = pcbddc->coarse_vec; 3239 } 3240 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3241 PetscFunctionReturn(0); 3242 } 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3246 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3247 { 3248 PetscErrorCode ierr; 3249 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3250 PetscScalar *array; 3251 Vec from,to; 3252 3253 PetscFunctionBegin; 3254 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3255 from = pcbddc->coarse_vec; 3256 to = pcbddc->vec1_P; 3257 } else { /* from local to global -> put data in coarse right hand side */ 3258 from = pcbddc->vec1_P; 3259 to = pcbddc->coarse_vec; 3260 } 3261 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3262 if (smode == SCATTER_FORWARD) { 3263 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3264 Vec tvec; 3265 3266 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3267 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3268 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3269 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3270 } 3271 } else { 3272 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3273 ierr = VecResetArray(from);CHKERRQ(ierr); 3274 } 3275 } 3276 PetscFunctionReturn(0); 3277 } 3278 3279 /* uncomment for testing purposes */ 3280 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3283 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3284 { 3285 PetscErrorCode ierr; 3286 PC_IS* pcis = (PC_IS*)(pc->data); 3287 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3288 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3289 /* one and zero */ 3290 PetscScalar one=1.0,zero=0.0; 3291 /* space to store constraints and their local indices */ 3292 PetscScalar *constraints_data; 3293 PetscInt *constraints_idxs,*constraints_idxs_B; 3294 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3295 PetscInt *constraints_n; 3296 /* iterators */ 3297 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3298 /* BLAS integers */ 3299 PetscBLASInt lwork,lierr; 3300 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3301 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3302 /* reuse */ 3303 PetscInt olocal_primal_size,olocal_primal_size_cc; 3304 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3305 /* change of basis */ 3306 PetscBool qr_needed; 3307 PetscBT change_basis,qr_needed_idx; 3308 /* auxiliary stuff */ 3309 PetscInt *nnz,*is_indices; 3310 PetscInt ncc; 3311 /* some quantities */ 3312 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3313 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3314 3315 PetscFunctionBegin; 3316 /* Destroy Mat objects computed previously */ 3317 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3318 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3319 /* save info on constraints from previous setup (if any) */ 3320 olocal_primal_size = pcbddc->local_primal_size; 3321 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3322 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3323 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3324 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3325 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3326 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3327 3328 /* print some info */ 3329 if (pcbddc->dbg_flag) { 3330 IS vertices; 3331 PetscInt nv,nedges,nfaces; 3332 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3333 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3334 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3335 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3340 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3341 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3342 } 3343 3344 if (!pcbddc->adaptive_selection) { 3345 IS ISForVertices,*ISForFaces,*ISForEdges; 3346 MatNullSpace nearnullsp; 3347 const Vec *nearnullvecs; 3348 Vec *localnearnullsp; 3349 PetscScalar *array; 3350 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3351 PetscBool nnsp_has_cnst; 3352 /* LAPACK working arrays for SVD or POD */ 3353 PetscBool skip_lapack,boolforchange; 3354 PetscScalar *work; 3355 PetscReal *singular_vals; 3356 #if defined(PETSC_USE_COMPLEX) 3357 PetscReal *rwork; 3358 #endif 3359 #if defined(PETSC_MISSING_LAPACK_GESVD) 3360 PetscScalar *temp_basis,*correlation_mat; 3361 #else 3362 PetscBLASInt dummy_int=1; 3363 PetscScalar dummy_scalar=1.; 3364 #endif 3365 3366 /* Get index sets for faces, edges and vertices from graph */ 3367 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3368 /* free unneeded index sets */ 3369 if (!pcbddc->use_vertices) { 3370 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3371 } 3372 if (!pcbddc->use_edges) { 3373 for (i=0;i<n_ISForEdges;i++) { 3374 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3375 } 3376 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3377 n_ISForEdges = 0; 3378 } 3379 if (!pcbddc->use_faces) { 3380 for (i=0;i<n_ISForFaces;i++) { 3381 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3382 } 3383 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3384 n_ISForFaces = 0; 3385 } 3386 3387 #if defined(PETSC_USE_DEBUG) 3388 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3389 Also use_change_of_basis should be consistent among processors */ 3390 if (pcbddc->NullSpace) { 3391 PetscBool tbool[2],gbool[2]; 3392 3393 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3394 pcbddc->use_change_of_basis = PETSC_TRUE; 3395 if (!ISForEdges) { 3396 pcbddc->use_change_on_faces = PETSC_TRUE; 3397 } 3398 } 3399 tbool[0] = pcbddc->use_change_of_basis; 3400 tbool[1] = pcbddc->use_change_on_faces; 3401 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3402 pcbddc->use_change_of_basis = gbool[0]; 3403 pcbddc->use_change_on_faces = gbool[1]; 3404 } 3405 #endif 3406 3407 /* check if near null space is attached to global mat */ 3408 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3409 if (nearnullsp) { 3410 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3411 /* remove any stored info */ 3412 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3413 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3414 /* store information for BDDC solver reuse */ 3415 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3416 pcbddc->onearnullspace = nearnullsp; 3417 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3418 for (i=0;i<nnsp_size;i++) { 3419 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3420 } 3421 } else { /* if near null space is not provided BDDC uses constants by default */ 3422 nnsp_size = 0; 3423 nnsp_has_cnst = PETSC_TRUE; 3424 } 3425 /* get max number of constraints on a single cc */ 3426 max_constraints = nnsp_size; 3427 if (nnsp_has_cnst) max_constraints++; 3428 3429 /* 3430 Evaluate maximum storage size needed by the procedure 3431 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3432 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3433 There can be multiple constraints per connected component 3434 */ 3435 n_vertices = 0; 3436 if (ISForVertices) { 3437 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3438 } 3439 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3440 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3441 3442 total_counts = n_ISForFaces+n_ISForEdges; 3443 total_counts *= max_constraints; 3444 total_counts += n_vertices; 3445 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3446 3447 total_counts = 0; 3448 max_size_of_constraint = 0; 3449 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3450 IS used_is; 3451 if (i<n_ISForEdges) { 3452 used_is = ISForEdges[i]; 3453 } else { 3454 used_is = ISForFaces[i-n_ISForEdges]; 3455 } 3456 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3457 total_counts += j; 3458 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3459 } 3460 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3461 3462 /* get local part of global near null space vectors */ 3463 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3464 for (k=0;k<nnsp_size;k++) { 3465 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3466 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3467 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3468 } 3469 3470 /* whether or not to skip lapack calls */ 3471 skip_lapack = PETSC_TRUE; 3472 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3473 3474 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3475 if (!skip_lapack) { 3476 PetscScalar temp_work; 3477 3478 #if defined(PETSC_MISSING_LAPACK_GESVD) 3479 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3480 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3482 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3483 #if defined(PETSC_USE_COMPLEX) 3484 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3485 #endif 3486 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3487 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3488 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3489 lwork = -1; 3490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3491 #if !defined(PETSC_USE_COMPLEX) 3492 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3493 #else 3494 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3495 #endif 3496 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3497 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3498 #else /* on missing GESVD */ 3499 /* SVD */ 3500 PetscInt max_n,min_n; 3501 max_n = max_size_of_constraint; 3502 min_n = max_constraints; 3503 if (max_size_of_constraint < max_constraints) { 3504 min_n = max_size_of_constraint; 3505 max_n = max_constraints; 3506 } 3507 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3508 #if defined(PETSC_USE_COMPLEX) 3509 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3510 #endif 3511 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3512 lwork = -1; 3513 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3514 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3515 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3517 #if !defined(PETSC_USE_COMPLEX) 3518 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3519 #else 3520 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3521 #endif 3522 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3523 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3524 #endif /* on missing GESVD */ 3525 /* Allocate optimal workspace */ 3526 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3527 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3528 } 3529 /* Now we can loop on constraining sets */ 3530 total_counts = 0; 3531 constraints_idxs_ptr[0] = 0; 3532 constraints_data_ptr[0] = 0; 3533 /* vertices */ 3534 if (n_vertices) { 3535 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3536 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3537 for (i=0;i<n_vertices;i++) { 3538 constraints_n[total_counts] = 1; 3539 constraints_data[total_counts] = 1.0; 3540 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3541 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3542 total_counts++; 3543 } 3544 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3545 n_vertices = total_counts; 3546 } 3547 3548 /* edges and faces */ 3549 total_counts_cc = total_counts; 3550 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3551 IS used_is; 3552 PetscBool idxs_copied = PETSC_FALSE; 3553 3554 if (ncc<n_ISForEdges) { 3555 used_is = ISForEdges[ncc]; 3556 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3557 } else { 3558 used_is = ISForFaces[ncc-n_ISForEdges]; 3559 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3560 } 3561 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3562 3563 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3564 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3565 /* change of basis should not be performed on local periodic nodes */ 3566 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3567 if (nnsp_has_cnst) { 3568 PetscScalar quad_value; 3569 3570 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3571 idxs_copied = PETSC_TRUE; 3572 3573 if (!pcbddc->use_nnsp_true) { 3574 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3575 } else { 3576 quad_value = 1.0; 3577 } 3578 for (j=0;j<size_of_constraint;j++) { 3579 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3580 } 3581 temp_constraints++; 3582 total_counts++; 3583 } 3584 for (k=0;k<nnsp_size;k++) { 3585 PetscReal real_value; 3586 PetscScalar *ptr_to_data; 3587 3588 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3589 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3590 for (j=0;j<size_of_constraint;j++) { 3591 ptr_to_data[j] = array[is_indices[j]]; 3592 } 3593 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3594 /* check if array is null on the connected component */ 3595 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3596 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3597 if (real_value > 0.0) { /* keep indices and values */ 3598 temp_constraints++; 3599 total_counts++; 3600 if (!idxs_copied) { 3601 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3602 idxs_copied = PETSC_TRUE; 3603 } 3604 } 3605 } 3606 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3607 valid_constraints = temp_constraints; 3608 if (!pcbddc->use_nnsp_true && temp_constraints) { 3609 if (temp_constraints == 1) { /* just normalize the constraint */ 3610 PetscScalar norm,*ptr_to_data; 3611 3612 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3614 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3615 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3616 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3617 } else { /* perform SVD */ 3618 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3619 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3620 3621 #if defined(PETSC_MISSING_LAPACK_GESVD) 3622 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3623 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3624 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3625 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3626 from that computed using LAPACKgesvd 3627 -> This is due to a different computation of eigenvectors in LAPACKheev 3628 -> The quality of the POD-computed basis will be the same */ 3629 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3630 /* Store upper triangular part of correlation matrix */ 3631 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3632 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3633 for (j=0;j<temp_constraints;j++) { 3634 for (k=0;k<j+1;k++) { 3635 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3636 } 3637 } 3638 /* compute eigenvalues and eigenvectors of correlation matrix */ 3639 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3640 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3641 #if !defined(PETSC_USE_COMPLEX) 3642 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3643 #else 3644 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3645 #endif 3646 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3647 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3648 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3649 j = 0; 3650 while (j < temp_constraints && singular_vals[j] < tol) j++; 3651 total_counts = total_counts-j; 3652 valid_constraints = temp_constraints-j; 3653 /* scale and copy POD basis into used quadrature memory */ 3654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3655 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3656 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3657 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3658 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3659 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3660 if (j<temp_constraints) { 3661 PetscInt ii; 3662 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3663 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3664 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3665 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3666 for (k=0;k<temp_constraints-j;k++) { 3667 for (ii=0;ii<size_of_constraint;ii++) { 3668 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3669 } 3670 } 3671 } 3672 #else /* on missing GESVD */ 3673 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3674 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3675 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3676 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3677 #if !defined(PETSC_USE_COMPLEX) 3678 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3679 #else 3680 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3681 #endif 3682 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3683 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3684 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3685 k = temp_constraints; 3686 if (k > size_of_constraint) k = size_of_constraint; 3687 j = 0; 3688 while (j < k && singular_vals[k-j-1] < tol) j++; 3689 valid_constraints = k-j; 3690 total_counts = total_counts-temp_constraints+valid_constraints; 3691 #endif /* on missing GESVD */ 3692 } 3693 } 3694 /* update pointers information */ 3695 if (valid_constraints) { 3696 constraints_n[total_counts_cc] = valid_constraints; 3697 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3698 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3699 /* set change_of_basis flag */ 3700 if (boolforchange) { 3701 PetscBTSet(change_basis,total_counts_cc); 3702 } 3703 total_counts_cc++; 3704 } 3705 } 3706 /* free workspace */ 3707 if (!skip_lapack) { 3708 ierr = PetscFree(work);CHKERRQ(ierr); 3709 #if defined(PETSC_USE_COMPLEX) 3710 ierr = PetscFree(rwork);CHKERRQ(ierr); 3711 #endif 3712 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3713 #if defined(PETSC_MISSING_LAPACK_GESVD) 3714 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3715 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3716 #endif 3717 } 3718 for (k=0;k<nnsp_size;k++) { 3719 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3720 } 3721 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3722 /* free index sets of faces, edges and vertices */ 3723 for (i=0;i<n_ISForFaces;i++) { 3724 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3725 } 3726 if (n_ISForFaces) { 3727 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3728 } 3729 for (i=0;i<n_ISForEdges;i++) { 3730 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3731 } 3732 if (n_ISForEdges) { 3733 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3734 } 3735 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3736 } else { 3737 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3738 3739 total_counts = 0; 3740 n_vertices = 0; 3741 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3742 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3743 } 3744 max_constraints = 0; 3745 total_counts_cc = 0; 3746 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3747 total_counts += pcbddc->adaptive_constraints_n[i]; 3748 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3749 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3750 } 3751 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3752 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3753 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3754 constraints_data = pcbddc->adaptive_constraints_data; 3755 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3756 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3757 total_counts_cc = 0; 3758 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3759 if (pcbddc->adaptive_constraints_n[i]) { 3760 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3761 } 3762 } 3763 #if 0 3764 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3765 for (i=0;i<total_counts_cc;i++) { 3766 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3767 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3768 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3769 printf(" %d",constraints_idxs[j]); 3770 } 3771 printf("\n"); 3772 printf("number of cc: %d\n",constraints_n[i]); 3773 } 3774 for (i=0;i<n_vertices;i++) { 3775 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3776 } 3777 for (i=0;i<sub_schurs->n_subs;i++) { 3778 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3779 } 3780 #endif 3781 3782 max_size_of_constraint = 0; 3783 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3784 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3785 /* Change of basis */ 3786 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3787 if (pcbddc->use_change_of_basis) { 3788 for (i=0;i<sub_schurs->n_subs;i++) { 3789 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3790 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3791 } 3792 } 3793 } 3794 } 3795 pcbddc->local_primal_size = total_counts; 3796 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3797 3798 /* map constraints_idxs in boundary numbering */ 3799 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3800 if (i != constraints_idxs_ptr[total_counts_cc]) { 3801 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3802 } 3803 3804 /* Create constraint matrix */ 3805 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3806 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3807 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3808 3809 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3810 /* determine if a QR strategy is needed for change of basis */ 3811 qr_needed = PETSC_FALSE; 3812 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3813 total_primal_vertices=0; 3814 pcbddc->local_primal_size_cc = 0; 3815 for (i=0;i<total_counts_cc;i++) { 3816 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3817 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3818 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3819 pcbddc->local_primal_size_cc += 1; 3820 } else if (PetscBTLookup(change_basis,i)) { 3821 for (k=0;k<constraints_n[i];k++) { 3822 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3823 } 3824 pcbddc->local_primal_size_cc += constraints_n[i]; 3825 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3826 PetscBTSet(qr_needed_idx,i); 3827 qr_needed = PETSC_TRUE; 3828 } 3829 } else { 3830 pcbddc->local_primal_size_cc += 1; 3831 } 3832 } 3833 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3834 pcbddc->n_vertices = total_primal_vertices; 3835 /* permute indices in order to have a sorted set of vertices */ 3836 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3837 3838 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3839 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3840 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3841 3842 /* nonzero structure of constraint matrix */ 3843 /* and get reference dof for local constraints */ 3844 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3845 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3846 3847 j = total_primal_vertices; 3848 total_counts = total_primal_vertices; 3849 cum = total_primal_vertices; 3850 for (i=n_vertices;i<total_counts_cc;i++) { 3851 if (!PetscBTLookup(change_basis,i)) { 3852 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3853 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3854 cum++; 3855 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3856 for (k=0;k<constraints_n[i];k++) { 3857 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3858 nnz[j+k] = size_of_constraint; 3859 } 3860 j += constraints_n[i]; 3861 } 3862 } 3863 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3864 ierr = PetscFree(nnz);CHKERRQ(ierr); 3865 3866 /* set values in constraint matrix */ 3867 for (i=0;i<total_primal_vertices;i++) { 3868 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3869 } 3870 total_counts = total_primal_vertices; 3871 for (i=n_vertices;i<total_counts_cc;i++) { 3872 if (!PetscBTLookup(change_basis,i)) { 3873 PetscInt *cols; 3874 3875 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3876 cols = constraints_idxs+constraints_idxs_ptr[i]; 3877 for (k=0;k<constraints_n[i];k++) { 3878 PetscInt row = total_counts+k; 3879 PetscScalar *vals; 3880 3881 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3882 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3883 } 3884 total_counts += constraints_n[i]; 3885 } 3886 } 3887 /* assembling */ 3888 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 3891 /* 3892 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3893 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3894 */ 3895 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3896 if (pcbddc->use_change_of_basis) { 3897 /* dual and primal dofs on a single cc */ 3898 PetscInt dual_dofs,primal_dofs; 3899 /* working stuff for GEQRF */ 3900 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3901 PetscBLASInt lqr_work; 3902 /* working stuff for UNGQR */ 3903 PetscScalar *gqr_work,lgqr_work_t; 3904 PetscBLASInt lgqr_work; 3905 /* working stuff for TRTRS */ 3906 PetscScalar *trs_rhs; 3907 PetscBLASInt Blas_NRHS; 3908 /* pointers for values insertion into change of basis matrix */ 3909 PetscInt *start_rows,*start_cols; 3910 PetscScalar *start_vals; 3911 /* working stuff for values insertion */ 3912 PetscBT is_primal; 3913 PetscInt *aux_primal_numbering_B; 3914 /* matrix sizes */ 3915 PetscInt global_size,local_size; 3916 /* temporary change of basis */ 3917 Mat localChangeOfBasisMatrix; 3918 /* extra space for debugging */ 3919 PetscScalar *dbg_work; 3920 3921 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3922 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3923 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3924 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3925 /* nonzeros for local mat */ 3926 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3927 for (i=0;i<pcis->n;i++) nnz[i]=1; 3928 for (i=n_vertices;i<total_counts_cc;i++) { 3929 if (PetscBTLookup(change_basis,i)) { 3930 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3931 if (PetscBTLookup(qr_needed_idx,i)) { 3932 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3933 } else { 3934 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3935 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3936 } 3937 } 3938 } 3939 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3940 ierr = PetscFree(nnz);CHKERRQ(ierr); 3941 /* Set initial identity in the matrix */ 3942 for (i=0;i<pcis->n;i++) { 3943 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3944 } 3945 3946 if (pcbddc->dbg_flag) { 3947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3949 } 3950 3951 3952 /* Now we loop on the constraints which need a change of basis */ 3953 /* 3954 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3955 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3956 3957 Basic blocks of change of basis matrix T computed by 3958 3959 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3960 3961 | 1 0 ... 0 s_1/S | 3962 | 0 1 ... 0 s_2/S | 3963 | ... | 3964 | 0 ... 1 s_{n-1}/S | 3965 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3966 3967 with S = \sum_{i=1}^n s_i^2 3968 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3969 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3970 3971 - QR decomposition of constraints otherwise 3972 */ 3973 if (qr_needed) { 3974 /* space to store Q */ 3975 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3976 /* first we issue queries for optimal work */ 3977 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3979 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3980 lqr_work = -1; 3981 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3982 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3983 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3984 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3985 lgqr_work = -1; 3986 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3987 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3988 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3989 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3990 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3991 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3992 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3993 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3994 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3995 /* array to store scaling factors for reflectors */ 3996 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3997 /* array to store rhs and solution of triangular solver */ 3998 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3999 /* allocating workspace for check */ 4000 if (pcbddc->dbg_flag) { 4001 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4002 } 4003 } 4004 /* array to store whether a node is primal or not */ 4005 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4006 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4007 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4008 if (i != total_primal_vertices) { 4009 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 4010 } 4011 for (i=0;i<total_primal_vertices;i++) { 4012 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4013 } 4014 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4015 4016 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4017 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4018 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4019 if (PetscBTLookup(change_basis,total_counts)) { 4020 /* get constraint info */ 4021 primal_dofs = constraints_n[total_counts]; 4022 dual_dofs = size_of_constraint-primal_dofs; 4023 4024 if (pcbddc->dbg_flag) { 4025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4026 } 4027 4028 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4029 4030 /* copy quadrature constraints for change of basis check */ 4031 if (pcbddc->dbg_flag) { 4032 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4033 } 4034 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4035 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4036 4037 /* compute QR decomposition of constraints */ 4038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4039 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4041 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4042 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4043 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4044 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4045 4046 /* explictly compute R^-T */ 4047 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4048 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4049 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4050 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4052 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4053 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4054 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4055 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4056 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4057 4058 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4061 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4062 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4063 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4064 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4065 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4066 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4067 4068 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4069 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4070 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4071 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4072 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4073 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4074 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4075 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4076 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4077 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4078 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4079 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4080 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4081 4082 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4083 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4084 /* insert cols for primal dofs */ 4085 for (j=0;j<primal_dofs;j++) { 4086 start_vals = &qr_basis[j*size_of_constraint]; 4087 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4088 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4089 } 4090 /* insert cols for dual dofs */ 4091 for (j=0,k=0;j<dual_dofs;k++) { 4092 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4093 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4094 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4095 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4096 j++; 4097 } 4098 } 4099 4100 /* check change of basis */ 4101 if (pcbddc->dbg_flag) { 4102 PetscInt ii,jj; 4103 PetscBool valid_qr=PETSC_TRUE; 4104 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4105 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4106 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4107 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4109 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4110 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4111 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4112 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4113 for (jj=0;jj<size_of_constraint;jj++) { 4114 for (ii=0;ii<primal_dofs;ii++) { 4115 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4116 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4117 } 4118 } 4119 if (!valid_qr) { 4120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4121 for (jj=0;jj<size_of_constraint;jj++) { 4122 for (ii=0;ii<primal_dofs;ii++) { 4123 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4124 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4125 } 4126 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4127 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4128 } 4129 } 4130 } 4131 } else { 4132 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4133 } 4134 } 4135 } else { /* simple transformation block */ 4136 PetscInt row,col; 4137 PetscScalar val,norm; 4138 4139 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4140 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4141 for (j=0;j<size_of_constraint;j++) { 4142 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4143 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4144 if (!PetscBTLookup(is_primal,row_B)) { 4145 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4146 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4147 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4148 } else { 4149 for (k=0;k<size_of_constraint;k++) { 4150 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4151 if (row != col) { 4152 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4153 } else { 4154 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4155 } 4156 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4157 } 4158 } 4159 } 4160 if (pcbddc->dbg_flag) { 4161 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4162 } 4163 } 4164 } else { 4165 if (pcbddc->dbg_flag) { 4166 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4167 } 4168 } 4169 } 4170 4171 /* free workspace */ 4172 if (qr_needed) { 4173 if (pcbddc->dbg_flag) { 4174 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4175 } 4176 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4177 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4178 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4179 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4180 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4181 } 4182 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4183 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4184 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4185 4186 /* assembling of global change of variable */ 4187 if (!pcbddc->fake_change) { 4188 Mat tmat; 4189 PetscInt bs; 4190 4191 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4192 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4193 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4194 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4195 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4196 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4197 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4198 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4199 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4200 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4201 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4202 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4203 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4204 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4205 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4206 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4207 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4208 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4209 4210 /* check */ 4211 if (pcbddc->dbg_flag) { 4212 PetscReal error; 4213 Vec x,x_change; 4214 4215 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4216 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4217 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4218 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4219 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4220 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4221 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4222 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4223 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4224 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4225 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4226 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4228 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4229 ierr = VecDestroy(&x);CHKERRQ(ierr); 4230 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4231 } 4232 /* adapt sub_schurs computed (if any) */ 4233 if (pcbddc->use_deluxe_scaling) { 4234 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4235 4236 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4237 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4238 } 4239 if (sub_schurs->S_Ej_all) { 4240 Mat S_new,tmat; 4241 IS is_all_N,is_V_Sall = NULL; 4242 4243 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4244 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4245 if (pcbddc->deluxe_zerorows) { 4246 ISLocalToGlobalMapping NtoSall; 4247 IS is_V; 4248 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4249 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4250 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4251 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4252 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4253 } 4254 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4255 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4256 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4257 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4258 if (pcbddc->deluxe_zerorows) { 4259 const PetscScalar *array; 4260 const PetscInt *idxs_V,*idxs_all; 4261 PetscInt i,n_V; 4262 4263 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4264 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4265 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4266 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4267 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4268 for (i=0;i<n_V;i++) { 4269 PetscScalar val; 4270 PetscInt idx; 4271 4272 idx = idxs_V[i]; 4273 val = array[idxs_all[idxs_V[i]]]; 4274 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4275 } 4276 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4277 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4278 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4279 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4280 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4281 } 4282 sub_schurs->S_Ej_all = S_new; 4283 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4284 if (sub_schurs->sum_S_Ej_all) { 4285 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4286 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4287 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4288 if (pcbddc->deluxe_zerorows) { 4289 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4290 } 4291 sub_schurs->sum_S_Ej_all = S_new; 4292 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4293 } 4294 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4295 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4296 } 4297 /* destroy any change of basis context in sub_schurs */ 4298 if (sub_schurs->change) { 4299 PetscInt i; 4300 4301 for (i=0;i<sub_schurs->n_subs;i++) { 4302 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4303 } 4304 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4305 } 4306 } 4307 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4308 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4309 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4310 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4311 pcbddc->use_qr_single = qr_needed; 4312 } 4313 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4314 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4315 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4316 } 4317 4318 /* set up change of basis context */ 4319 if (pcbddc->ChangeOfBasisMatrix) { 4320 PCBDDCChange_ctx change_ctx; 4321 4322 if (!pcbddc->new_global_mat) { 4323 PetscInt global_size,local_size; 4324 4325 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4326 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4327 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4328 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4329 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4330 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4331 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4332 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4333 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4334 } else { 4335 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4336 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4337 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4338 } 4339 if (!pcbddc->user_ChangeOfBasisMatrix) { 4340 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4341 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4342 } else { 4343 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4344 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4345 } 4346 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4347 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4348 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4349 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4350 } else { 4351 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4352 } 4353 4354 if (!pcbddc->fake_change) { 4355 /* add pressure dofs to set of primal nodes for numbering purposes */ 4356 for (i=0;i<pcbddc->benign_n;i++) { 4357 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4358 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4359 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4360 pcbddc->local_primal_size_cc++; 4361 pcbddc->local_primal_size++; 4362 } 4363 4364 /* check if a new primal space has been introduced (also take into account benign trick) */ 4365 pcbddc->new_primal_space_local = PETSC_TRUE; 4366 if (olocal_primal_size == pcbddc->local_primal_size) { 4367 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4368 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4369 if (!pcbddc->new_primal_space_local) { 4370 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4371 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4372 } 4373 } 4374 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4375 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4376 } 4377 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4378 4379 /* flush dbg viewer */ 4380 if (pcbddc->dbg_flag) { 4381 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4382 } 4383 4384 /* free workspace */ 4385 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4386 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4387 if (!pcbddc->adaptive_selection) { 4388 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4389 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4390 } else { 4391 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4392 pcbddc->adaptive_constraints_idxs_ptr, 4393 pcbddc->adaptive_constraints_data_ptr, 4394 pcbddc->adaptive_constraints_idxs, 4395 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4396 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4397 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4398 } 4399 PetscFunctionReturn(0); 4400 } 4401 4402 #undef __FUNCT__ 4403 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4404 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4405 { 4406 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4407 PC_IS *pcis = (PC_IS*)pc->data; 4408 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4409 PetscInt ierr,i,N; 4410 PetscViewer viewer=pcbddc->dbg_viewer; 4411 4412 PetscFunctionBegin; 4413 /* Reset previously computed graph */ 4414 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4415 /* Init local Graph struct */ 4416 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4417 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4418 4419 /* Check validity of the csr graph passed in by the user */ 4420 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4421 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4422 } 4423 4424 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4425 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4426 PetscInt *xadj,*adjncy; 4427 PetscInt nvtxs; 4428 PetscBool flg_row=PETSC_FALSE; 4429 4430 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4431 if (flg_row) { 4432 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4433 pcbddc->computed_rowadj = PETSC_TRUE; 4434 } 4435 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4436 } 4437 if (pcbddc->dbg_flag) { 4438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4439 } 4440 4441 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4442 if (pcbddc->user_provided_isfordofs) { 4443 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4444 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4445 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4446 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4447 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4448 } 4449 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4450 pcbddc->n_ISForDofs = 0; 4451 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4452 } 4453 } else { 4454 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4455 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4456 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4457 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4458 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4459 } 4460 } 4461 } 4462 4463 /* Setup of Graph */ 4464 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4465 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4466 } 4467 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4468 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4469 } 4470 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4471 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4472 } 4473 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4474 4475 /* attach info on disconnected subdomains if present */ 4476 if (pcbddc->n_local_subs) { 4477 PetscInt *local_subs; 4478 4479 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4480 for (i=0;i<pcbddc->n_local_subs;i++) { 4481 const PetscInt *idxs; 4482 PetscInt nl,j; 4483 4484 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4485 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4486 for (j=0;j<nl;j++) { 4487 local_subs[idxs[j]] = i; 4488 } 4489 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4490 } 4491 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4492 pcbddc->mat_graph->local_subs = local_subs; 4493 } 4494 4495 /* Graph's connected components analysis */ 4496 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4497 4498 /* print some info to stdout */ 4499 if (pcbddc->dbg_flag) { 4500 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4501 } 4502 PetscFunctionReturn(0); 4503 } 4504 4505 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4506 #undef __FUNCT__ 4507 #define __FUNCT__ "PCBDDCSubsetNumbering" 4508 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4509 { 4510 PetscSF sf; 4511 PetscLayout map; 4512 const PetscInt *idxs; 4513 PetscInt *leaf_data,*root_data,*gidxs; 4514 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4515 PetscInt n_n,nlocals,start,first_index; 4516 PetscMPIInt commsize; 4517 PetscBool first_found; 4518 PetscErrorCode ierr; 4519 4520 PetscFunctionBegin; 4521 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4522 if (subset_mult) { 4523 PetscCheckSameComm(subset,1,subset_mult,2); 4524 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4525 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4526 } 4527 /* create workspace layout for computing global indices of subset */ 4528 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4529 lbounds[0] = lbounds[1] = 0; 4530 for (i=0;i<n;i++) { 4531 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4532 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4533 } 4534 lbounds[0] = -lbounds[0]; 4535 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4536 gbounds[0] = -gbounds[0]; 4537 N = gbounds[1] - gbounds[0] + 1; 4538 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4539 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4540 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4541 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4542 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4543 4544 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4545 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4546 if (subset_mult) { 4547 const PetscInt* idxs_mult; 4548 4549 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4550 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4551 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4552 } else { 4553 for (i=0;i<n;i++) leaf_data[i] = 1; 4554 } 4555 /* local size of new subset */ 4556 n_n = 0; 4557 for (i=0;i<n;i++) n_n += leaf_data[i]; 4558 4559 /* global indexes in layout */ 4560 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4561 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4562 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4564 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4565 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4566 4567 /* reduce from leaves to roots */ 4568 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4569 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4570 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4571 4572 /* count indexes in local part of layout */ 4573 nlocals = 0; 4574 first_index = -1; 4575 first_found = PETSC_FALSE; 4576 for (i=0;i<Nl;i++) { 4577 if (!first_found && root_data[i]) { 4578 first_found = PETSC_TRUE; 4579 first_index = i; 4580 } 4581 nlocals += root_data[i]; 4582 } 4583 4584 /* cumulative of number of indexes and size of subset without holes */ 4585 #if defined(PETSC_HAVE_MPI_EXSCAN) 4586 start = 0; 4587 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4588 #else 4589 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4590 start = start-nlocals; 4591 #endif 4592 4593 if (N_n) { /* compute total size of new subset if requested */ 4594 *N_n = start + nlocals; 4595 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4596 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4597 } 4598 4599 /* adapt root data with cumulative */ 4600 if (first_found) { 4601 PetscInt old_index; 4602 4603 root_data[first_index] += start; 4604 old_index = first_index; 4605 for (i=first_index+1;i<Nl;i++) { 4606 if (root_data[i]) { 4607 root_data[i] += root_data[old_index]; 4608 old_index = i; 4609 } 4610 } 4611 } 4612 4613 /* from roots to leaves */ 4614 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4615 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4616 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4617 4618 /* create new IS with global indexes without holes */ 4619 if (subset_mult) { 4620 const PetscInt* idxs_mult; 4621 PetscInt cum; 4622 4623 cum = 0; 4624 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4625 for (i=0;i<n;i++) { 4626 PetscInt j; 4627 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4628 } 4629 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4630 } else { 4631 for (i=0;i<n;i++) { 4632 gidxs[i] = leaf_data[i]-1; 4633 } 4634 } 4635 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4636 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4637 PetscFunctionReturn(0); 4638 } 4639 4640 #undef __FUNCT__ 4641 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4642 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4643 { 4644 PetscInt i,j; 4645 PetscScalar *alphas; 4646 PetscErrorCode ierr; 4647 4648 PetscFunctionBegin; 4649 /* this implements stabilized Gram-Schmidt */ 4650 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4651 for (i=0;i<n;i++) { 4652 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4653 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4654 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4655 } 4656 ierr = PetscFree(alphas);CHKERRQ(ierr); 4657 PetscFunctionReturn(0); 4658 } 4659 4660 #undef __FUNCT__ 4661 #define __FUNCT__ "MatISGetSubassemblingPattern" 4662 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4663 { 4664 IS ranks_send_to; 4665 PetscInt n_neighs,*neighs,*n_shared,**shared; 4666 PetscMPIInt size,rank,color; 4667 PetscInt *xadj,*adjncy; 4668 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4669 PetscInt i,local_size,threshold=0; 4670 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4671 PetscSubcomm subcomm; 4672 PetscErrorCode ierr; 4673 4674 PetscFunctionBegin; 4675 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4676 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4677 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4678 4679 /* Get info on mapping */ 4680 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4681 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4682 4683 /* build local CSR graph of subdomains' connectivity */ 4684 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4685 xadj[0] = 0; 4686 xadj[1] = PetscMax(n_neighs-1,0); 4687 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4688 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4689 4690 if (threshold) { 4691 PetscInt xadj_count = 0; 4692 for (i=1;i<n_neighs;i++) { 4693 if (n_shared[i] > threshold) { 4694 adjncy[xadj_count] = neighs[i]; 4695 adjncy_wgt[xadj_count] = n_shared[i]; 4696 xadj_count++; 4697 } 4698 } 4699 xadj[1] = xadj_count; 4700 } else { 4701 if (xadj[1]) { 4702 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4703 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4704 } 4705 } 4706 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4707 if (use_square) { 4708 for (i=0;i<xadj[1];i++) { 4709 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4710 } 4711 } 4712 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4713 4714 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4715 4716 /* 4717 Restrict work on active processes only. 4718 */ 4719 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4720 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4721 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4722 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4723 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4724 if (color) { 4725 ierr = PetscFree(xadj);CHKERRQ(ierr); 4726 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4727 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4728 } else { 4729 Mat subdomain_adj; 4730 IS new_ranks,new_ranks_contig; 4731 MatPartitioning partitioner; 4732 PetscInt prank,rstart=0,rend=0; 4733 PetscInt *is_indices,*oldranks; 4734 PetscBool aggregate; 4735 4736 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4737 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4738 prank = rank; 4739 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4740 /* 4741 for (i=0;i<size;i++) { 4742 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4743 } 4744 */ 4745 for (i=0;i<xadj[1];i++) { 4746 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4747 } 4748 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4749 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4750 if (aggregate) { 4751 PetscInt lrows,row,ncols,*cols; 4752 PetscMPIInt nrank; 4753 PetscScalar *vals; 4754 4755 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4756 lrows = 0; 4757 if (nrank<redprocs) { 4758 lrows = size/redprocs; 4759 if (nrank<size%redprocs) lrows++; 4760 } 4761 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4762 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4763 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4764 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4765 row = nrank; 4766 ncols = xadj[1]-xadj[0]; 4767 cols = adjncy; 4768 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4769 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4770 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4771 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4772 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4773 ierr = PetscFree(xadj);CHKERRQ(ierr); 4774 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4775 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4776 ierr = PetscFree(vals);CHKERRQ(ierr); 4777 } else { 4778 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4779 } 4780 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4781 4782 /* Partition */ 4783 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4784 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4785 if (use_vwgt) { 4786 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4787 v_wgt[0] = local_size; 4788 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4789 } 4790 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4791 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4792 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4793 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4794 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4795 4796 /* renumber new_ranks to avoid "holes" in new set of processors */ 4797 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4798 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4799 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4800 if (!redprocs) { 4801 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4802 } else { 4803 PetscInt idxs[1]; 4804 PetscMPIInt tag; 4805 MPI_Request *reqs; 4806 4807 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4808 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4809 for (i=rstart;i<rend;i++) { 4810 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4811 } 4812 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4813 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4814 ierr = PetscFree(reqs);CHKERRQ(ierr); 4815 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4816 } 4817 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4818 /* clean up */ 4819 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4820 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4821 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4822 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4823 } 4824 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4825 4826 /* assemble parallel IS for sends */ 4827 i = 1; 4828 if (color) i=0; 4829 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4830 /* get back IS */ 4831 *is_sends = ranks_send_to; 4832 PetscFunctionReturn(0); 4833 } 4834 4835 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4836 4837 #undef __FUNCT__ 4838 #define __FUNCT__ "MatISSubassemble" 4839 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4840 { 4841 Mat local_mat; 4842 IS is_sends_internal; 4843 PetscInt rows,cols,new_local_rows; 4844 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4845 PetscBool ismatis,isdense,newisdense,destroy_mat; 4846 ISLocalToGlobalMapping l2gmap; 4847 PetscInt* l2gmap_indices; 4848 const PetscInt* is_indices; 4849 MatType new_local_type; 4850 /* buffers */ 4851 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4852 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4853 PetscInt *recv_buffer_idxs_local; 4854 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4855 /* MPI */ 4856 MPI_Comm comm,comm_n; 4857 PetscSubcomm subcomm; 4858 PetscMPIInt n_sends,n_recvs,commsize; 4859 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4860 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4861 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4862 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4863 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4864 PetscErrorCode ierr; 4865 4866 PetscFunctionBegin; 4867 /* TODO: add missing checks */ 4868 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4869 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4870 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4871 PetscValidLogicalCollectiveInt(mat,nis,7); 4872 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4873 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4874 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4875 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4876 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4877 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4878 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4879 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4880 PetscInt mrows,mcols,mnrows,mncols; 4881 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4882 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4883 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4884 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4885 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4886 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4887 } 4888 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4889 PetscValidLogicalCollectiveInt(mat,bs,0); 4890 /* prepare IS for sending if not provided */ 4891 if (!is_sends) { 4892 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4893 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4894 } else { 4895 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4896 is_sends_internal = is_sends; 4897 } 4898 4899 /* get comm */ 4900 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4901 4902 /* compute number of sends */ 4903 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4904 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4905 4906 /* compute number of receives */ 4907 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4908 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4909 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4910 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4911 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4912 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4913 ierr = PetscFree(iflags);CHKERRQ(ierr); 4914 4915 /* restrict comm if requested */ 4916 subcomm = 0; 4917 destroy_mat = PETSC_FALSE; 4918 if (restrict_comm) { 4919 PetscMPIInt color,subcommsize; 4920 4921 color = 0; 4922 if (restrict_full) { 4923 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4924 } else { 4925 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4926 } 4927 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4928 subcommsize = commsize - subcommsize; 4929 /* check if reuse has been requested */ 4930 if (reuse == MAT_REUSE_MATRIX) { 4931 if (*mat_n) { 4932 PetscMPIInt subcommsize2; 4933 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4934 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4935 comm_n = PetscObjectComm((PetscObject)*mat_n); 4936 } else { 4937 comm_n = PETSC_COMM_SELF; 4938 } 4939 } else { /* MAT_INITIAL_MATRIX */ 4940 PetscMPIInt rank; 4941 4942 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4943 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4944 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4945 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4946 comm_n = PetscSubcommChild(subcomm); 4947 } 4948 /* flag to destroy *mat_n if not significative */ 4949 if (color) destroy_mat = PETSC_TRUE; 4950 } else { 4951 comm_n = comm; 4952 } 4953 4954 /* prepare send/receive buffers */ 4955 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4956 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4957 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4958 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4959 if (nis) { 4960 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4961 } 4962 4963 /* Get data from local matrices */ 4964 if (!isdense) { 4965 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4966 /* TODO: See below some guidelines on how to prepare the local buffers */ 4967 /* 4968 send_buffer_vals should contain the raw values of the local matrix 4969 send_buffer_idxs should contain: 4970 - MatType_PRIVATE type 4971 - PetscInt size_of_l2gmap 4972 - PetscInt global_row_indices[size_of_l2gmap] 4973 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4974 */ 4975 } else { 4976 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4977 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4978 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4979 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4980 send_buffer_idxs[1] = i; 4981 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4982 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4983 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4984 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4985 for (i=0;i<n_sends;i++) { 4986 ilengths_vals[is_indices[i]] = len*len; 4987 ilengths_idxs[is_indices[i]] = len+2; 4988 } 4989 } 4990 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4991 /* additional is (if any) */ 4992 if (nis) { 4993 PetscMPIInt psum; 4994 PetscInt j; 4995 for (j=0,psum=0;j<nis;j++) { 4996 PetscInt plen; 4997 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4998 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4999 psum += len+1; /* indices + lenght */ 5000 } 5001 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5002 for (j=0,psum=0;j<nis;j++) { 5003 PetscInt plen; 5004 const PetscInt *is_array_idxs; 5005 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5006 send_buffer_idxs_is[psum] = plen; 5007 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5008 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5009 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5010 psum += plen+1; /* indices + lenght */ 5011 } 5012 for (i=0;i<n_sends;i++) { 5013 ilengths_idxs_is[is_indices[i]] = psum; 5014 } 5015 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5016 } 5017 5018 buf_size_idxs = 0; 5019 buf_size_vals = 0; 5020 buf_size_idxs_is = 0; 5021 for (i=0;i<n_recvs;i++) { 5022 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5023 buf_size_vals += (PetscInt)olengths_vals[i]; 5024 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5025 } 5026 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5027 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5028 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5029 5030 /* get new tags for clean communications */ 5031 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5032 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5033 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5034 5035 /* allocate for requests */ 5036 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5037 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5038 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5039 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5040 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5041 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5042 5043 /* communications */ 5044 ptr_idxs = recv_buffer_idxs; 5045 ptr_vals = recv_buffer_vals; 5046 ptr_idxs_is = recv_buffer_idxs_is; 5047 for (i=0;i<n_recvs;i++) { 5048 source_dest = onodes[i]; 5049 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5050 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5051 ptr_idxs += olengths_idxs[i]; 5052 ptr_vals += olengths_vals[i]; 5053 if (nis) { 5054 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5055 ptr_idxs_is += olengths_idxs_is[i]; 5056 } 5057 } 5058 for (i=0;i<n_sends;i++) { 5059 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5060 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5061 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5062 if (nis) { 5063 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5064 } 5065 } 5066 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5067 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5068 5069 /* assemble new l2g map */ 5070 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5071 ptr_idxs = recv_buffer_idxs; 5072 new_local_rows = 0; 5073 for (i=0;i<n_recvs;i++) { 5074 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5075 ptr_idxs += olengths_idxs[i]; 5076 } 5077 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5078 ptr_idxs = recv_buffer_idxs; 5079 new_local_rows = 0; 5080 for (i=0;i<n_recvs;i++) { 5081 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5082 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5083 ptr_idxs += olengths_idxs[i]; 5084 } 5085 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5086 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5087 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5088 5089 /* infer new local matrix type from received local matrices type */ 5090 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5091 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5092 if (n_recvs) { 5093 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5094 ptr_idxs = recv_buffer_idxs; 5095 for (i=0;i<n_recvs;i++) { 5096 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5097 new_local_type_private = MATAIJ_PRIVATE; 5098 break; 5099 } 5100 ptr_idxs += olengths_idxs[i]; 5101 } 5102 switch (new_local_type_private) { 5103 case MATDENSE_PRIVATE: 5104 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5105 new_local_type = MATSEQAIJ; 5106 bs = 1; 5107 } else { /* if I receive only 1 dense matrix */ 5108 new_local_type = MATSEQDENSE; 5109 bs = 1; 5110 } 5111 break; 5112 case MATAIJ_PRIVATE: 5113 new_local_type = MATSEQAIJ; 5114 bs = 1; 5115 break; 5116 case MATBAIJ_PRIVATE: 5117 new_local_type = MATSEQBAIJ; 5118 break; 5119 case MATSBAIJ_PRIVATE: 5120 new_local_type = MATSEQSBAIJ; 5121 break; 5122 default: 5123 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5124 break; 5125 } 5126 } else { /* by default, new_local_type is seqdense */ 5127 new_local_type = MATSEQDENSE; 5128 bs = 1; 5129 } 5130 5131 /* create MATIS object if needed */ 5132 if (reuse == MAT_INITIAL_MATRIX) { 5133 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5134 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5135 } else { 5136 /* it also destroys the local matrices */ 5137 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5138 } 5139 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5140 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5141 5142 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5143 5144 /* Global to local map of received indices */ 5145 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5146 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5147 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5148 5149 /* restore attributes -> type of incoming data and its size */ 5150 buf_size_idxs = 0; 5151 for (i=0;i<n_recvs;i++) { 5152 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5153 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5154 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5155 } 5156 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5157 5158 /* set preallocation */ 5159 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5160 if (!newisdense) { 5161 PetscInt *new_local_nnz=0; 5162 5163 ptr_vals = recv_buffer_vals; 5164 ptr_idxs = recv_buffer_idxs_local; 5165 if (n_recvs) { 5166 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5167 } 5168 for (i=0;i<n_recvs;i++) { 5169 PetscInt j; 5170 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5171 for (j=0;j<*(ptr_idxs+1);j++) { 5172 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5173 } 5174 } else { 5175 /* TODO */ 5176 } 5177 ptr_idxs += olengths_idxs[i]; 5178 } 5179 if (new_local_nnz) { 5180 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5181 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5182 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5183 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5184 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5185 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5186 } else { 5187 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5188 } 5189 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5190 } else { 5191 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5192 } 5193 5194 /* set values */ 5195 ptr_vals = recv_buffer_vals; 5196 ptr_idxs = recv_buffer_idxs_local; 5197 for (i=0;i<n_recvs;i++) { 5198 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5199 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5200 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5201 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5202 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5203 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5204 } else { 5205 /* TODO */ 5206 } 5207 ptr_idxs += olengths_idxs[i]; 5208 ptr_vals += olengths_vals[i]; 5209 } 5210 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5211 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5212 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5213 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5214 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5215 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5216 5217 #if 0 5218 if (!restrict_comm) { /* check */ 5219 Vec lvec,rvec; 5220 PetscReal infty_error; 5221 5222 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5223 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5224 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5225 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5226 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5227 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5228 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5229 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5230 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5231 } 5232 #endif 5233 5234 /* assemble new additional is (if any) */ 5235 if (nis) { 5236 PetscInt **temp_idxs,*count_is,j,psum; 5237 5238 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5239 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5240 ptr_idxs = recv_buffer_idxs_is; 5241 psum = 0; 5242 for (i=0;i<n_recvs;i++) { 5243 for (j=0;j<nis;j++) { 5244 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5245 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5246 psum += plen; 5247 ptr_idxs += plen+1; /* shift pointer to received data */ 5248 } 5249 } 5250 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5251 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5252 for (i=1;i<nis;i++) { 5253 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5254 } 5255 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5256 ptr_idxs = recv_buffer_idxs_is; 5257 for (i=0;i<n_recvs;i++) { 5258 for (j=0;j<nis;j++) { 5259 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5260 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5261 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5262 ptr_idxs += plen+1; /* shift pointer to received data */ 5263 } 5264 } 5265 for (i=0;i<nis;i++) { 5266 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5267 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5268 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5269 } 5270 ierr = PetscFree(count_is);CHKERRQ(ierr); 5271 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5272 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5273 } 5274 /* free workspace */ 5275 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5276 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5277 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5278 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5279 if (isdense) { 5280 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5281 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5282 } else { 5283 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5284 } 5285 if (nis) { 5286 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5287 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5288 } 5289 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5290 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5291 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5292 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5293 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5294 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5295 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5296 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5297 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5298 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5299 ierr = PetscFree(onodes);CHKERRQ(ierr); 5300 if (nis) { 5301 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5302 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5303 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5304 } 5305 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5306 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5307 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5308 for (i=0;i<nis;i++) { 5309 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5310 } 5311 *mat_n = NULL; 5312 } 5313 PetscFunctionReturn(0); 5314 } 5315 5316 /* temporary hack into ksp private data structure */ 5317 #include <petsc/private/kspimpl.h> 5318 5319 #undef __FUNCT__ 5320 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5321 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5322 { 5323 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5324 PC_IS *pcis = (PC_IS*)pc->data; 5325 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5326 MatNullSpace CoarseNullSpace=NULL; 5327 ISLocalToGlobalMapping coarse_islg; 5328 IS coarse_is,*isarray; 5329 PetscInt i,im_active=-1,active_procs=-1; 5330 PetscInt nis,nisdofs,nisneu,nisvert; 5331 PC pc_temp; 5332 PCType coarse_pc_type; 5333 KSPType coarse_ksp_type; 5334 PetscBool multilevel_requested,multilevel_allowed; 5335 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5336 Mat t_coarse_mat_is; 5337 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5338 PetscMPIInt all_procs; 5339 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5340 PetscBool compute_vecs = PETSC_FALSE; 5341 PetscScalar *array; 5342 PetscErrorCode ierr; 5343 5344 PetscFunctionBegin; 5345 /* Assign global numbering to coarse dofs */ 5346 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5347 PetscInt ocoarse_size; 5348 compute_vecs = PETSC_TRUE; 5349 ocoarse_size = pcbddc->coarse_size; 5350 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5351 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5352 /* see if we can avoid some work */ 5353 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5354 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5355 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5356 PC pc; 5357 PetscBool isbddc; 5358 5359 /* temporary workaround since PCBDDC does not have a reset method so far */ 5360 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5361 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5362 if (isbddc) { 5363 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5364 } else { 5365 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5366 } 5367 coarse_reuse = PETSC_FALSE; 5368 } else { /* we can safely reuse already computed coarse matrix */ 5369 coarse_reuse = PETSC_TRUE; 5370 } 5371 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5372 coarse_reuse = PETSC_FALSE; 5373 } 5374 /* reset any subassembling information */ 5375 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5376 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5377 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5378 coarse_reuse = PETSC_TRUE; 5379 } 5380 5381 /* count "active" (i.e. with positive local size) and "void" processes */ 5382 im_active = !!(pcis->n); 5383 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5384 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5385 void_procs = all_procs-active_procs; 5386 csin_type_simple = PETSC_TRUE; 5387 redist = PETSC_FALSE; 5388 if (pcbddc->current_level && void_procs) { 5389 csin_ml = PETSC_TRUE; 5390 ncoarse_ml = void_procs; 5391 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5392 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5393 csin_ds = PETSC_TRUE; 5394 ncoarse_ds = pcbddc->redistribute_coarse; 5395 redist = PETSC_TRUE; 5396 } else { 5397 csin_ds = PETSC_TRUE; 5398 ncoarse_ds = active_procs; 5399 redist = PETSC_TRUE; 5400 } 5401 } else { 5402 csin_ml = PETSC_FALSE; 5403 ncoarse_ml = all_procs; 5404 if (void_procs) { 5405 csin_ds = PETSC_TRUE; 5406 ncoarse_ds = void_procs; 5407 csin_type_simple = PETSC_FALSE; 5408 } else { 5409 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5410 csin_ds = PETSC_TRUE; 5411 ncoarse_ds = pcbddc->redistribute_coarse; 5412 redist = PETSC_TRUE; 5413 } else { 5414 csin_ds = PETSC_FALSE; 5415 ncoarse_ds = all_procs; 5416 } 5417 } 5418 } 5419 5420 /* 5421 test if we can go multilevel: three conditions must be satisfied: 5422 - we have not exceeded the number of levels requested 5423 - we can actually subassemble the active processes 5424 - we can find a suitable number of MPI processes where we can place the subassembled problem 5425 */ 5426 multilevel_allowed = PETSC_FALSE; 5427 multilevel_requested = PETSC_FALSE; 5428 if (pcbddc->current_level < pcbddc->max_levels) { 5429 multilevel_requested = PETSC_TRUE; 5430 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5431 multilevel_allowed = PETSC_FALSE; 5432 } else { 5433 multilevel_allowed = PETSC_TRUE; 5434 } 5435 } 5436 /* determine number of process partecipating to coarse solver */ 5437 if (multilevel_allowed) { 5438 ncoarse = ncoarse_ml; 5439 csin = csin_ml; 5440 redist = PETSC_FALSE; 5441 } else { 5442 ncoarse = ncoarse_ds; 5443 csin = csin_ds; 5444 } 5445 5446 /* creates temporary l2gmap and IS for coarse indexes */ 5447 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5448 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5449 5450 /* creates temporary MATIS object for coarse matrix */ 5451 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5452 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5453 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5454 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5455 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5456 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5457 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5458 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5459 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5460 5461 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5462 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5463 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5464 const PetscInt *idxs; 5465 ISLocalToGlobalMapping tmap; 5466 5467 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5468 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5469 /* allocate space for temporary storage */ 5470 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5471 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5472 /* allocate for IS array */ 5473 nisdofs = pcbddc->n_ISForDofsLocal; 5474 nisneu = !!pcbddc->NeumannBoundariesLocal; 5475 nisvert = 0; 5476 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5477 nisvert = 1; 5478 } 5479 nis = nisdofs + nisneu + nisvert; 5480 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5481 /* dofs splitting */ 5482 for (i=0;i<nisdofs;i++) { 5483 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5484 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5485 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5486 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5487 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5488 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5489 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5490 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5491 } 5492 /* neumann boundaries */ 5493 if (pcbddc->NeumannBoundariesLocal) { 5494 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5495 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5496 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5497 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5498 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5499 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5500 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5501 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5502 } 5503 /* primal vertices (benign) */ 5504 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5505 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5506 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5507 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5508 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5509 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5510 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5511 } 5512 /* free memory */ 5513 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5514 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5515 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5516 } else { 5517 nis = 0; 5518 nisdofs = 0; 5519 nisneu = 0; 5520 nisvert = 0; 5521 isarray = NULL; 5522 } 5523 /* destroy no longer needed map */ 5524 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5525 5526 /* restrict on coarse candidates (if needed) */ 5527 coarse_mat_is = NULL; 5528 if (csin) { 5529 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5530 if (redist) { 5531 PetscMPIInt rank; 5532 PetscInt spc,n_spc_p1,dest[1],destsize; 5533 5534 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5535 spc = active_procs/ncoarse; 5536 n_spc_p1 = active_procs%ncoarse; 5537 if (im_active) { 5538 destsize = 1; 5539 if (rank > n_spc_p1*(spc+1)-1) { 5540 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5541 } else { 5542 dest[0] = rank/(spc+1); 5543 } 5544 } else { 5545 destsize = 0; 5546 } 5547 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5548 } else if (csin_type_simple) { 5549 PetscMPIInt rank; 5550 PetscInt issize,isidx; 5551 5552 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5553 if (im_active) { 5554 issize = 1; 5555 isidx = (PetscInt)rank; 5556 } else { 5557 issize = 0; 5558 isidx = -1; 5559 } 5560 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5561 } else { /* get a suitable subassembling pattern from MATIS code */ 5562 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5563 } 5564 5565 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5566 if (!redist || ncoarse <= void_procs) { 5567 PetscInt ncoarse_cand,tissize,*nisindices; 5568 PetscInt *coarse_candidates; 5569 const PetscInt* tisindices; 5570 5571 /* get coarse candidates' ranks in pc communicator */ 5572 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5573 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5574 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5575 if (!coarse_candidates[i]) { 5576 coarse_candidates[ncoarse_cand++]=i; 5577 } 5578 } 5579 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5580 5581 5582 if (pcbddc->dbg_flag) { 5583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5585 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5587 for (i=0;i<ncoarse_cand;i++) { 5588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5589 } 5590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5592 } 5593 /* shift the pattern on coarse candidates */ 5594 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5595 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5596 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5597 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5598 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5599 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5600 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5601 } 5602 if (pcbddc->dbg_flag) { 5603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5604 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5605 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5606 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5607 } 5608 } 5609 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5610 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5611 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5612 } else { /* this is the last level, so use just receiving processes in subcomm */ 5613 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5614 } 5615 } else { 5616 if (pcbddc->dbg_flag) { 5617 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5619 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5620 } 5621 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5622 coarse_mat_is = t_coarse_mat_is; 5623 } 5624 5625 /* create local to global scatters for coarse problem */ 5626 if (compute_vecs) { 5627 PetscInt lrows; 5628 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5629 if (coarse_mat_is) { 5630 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5631 } else { 5632 lrows = 0; 5633 } 5634 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5635 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5636 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5637 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5638 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5639 } 5640 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5641 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5642 5643 /* set defaults for coarse KSP and PC */ 5644 if (multilevel_allowed) { 5645 coarse_ksp_type = KSPRICHARDSON; 5646 coarse_pc_type = PCBDDC; 5647 } else { 5648 coarse_ksp_type = KSPPREONLY; 5649 coarse_pc_type = PCREDUNDANT; 5650 } 5651 5652 /* print some info if requested */ 5653 if (pcbddc->dbg_flag) { 5654 if (!multilevel_allowed) { 5655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5656 if (multilevel_requested) { 5657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5658 } else if (pcbddc->max_levels) { 5659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5660 } 5661 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5662 } 5663 } 5664 5665 /* create the coarse KSP object only once with defaults */ 5666 if (coarse_mat_is) { 5667 MatReuse coarse_mat_reuse; 5668 PetscViewer dbg_viewer = NULL; 5669 if (pcbddc->dbg_flag) { 5670 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5671 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5672 } 5673 if (!pcbddc->coarse_ksp) { 5674 char prefix[256],str_level[16]; 5675 size_t len; 5676 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5677 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5678 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5679 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5680 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5681 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5682 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5683 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5684 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5685 /* prefix */ 5686 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5687 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5688 if (!pcbddc->current_level) { 5689 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5690 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5691 } else { 5692 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5693 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5694 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5695 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5696 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5697 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5698 } 5699 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5700 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5701 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5702 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5703 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5704 /* allow user customization */ 5705 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5706 } 5707 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5708 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5709 if (nisdofs) { 5710 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5711 for (i=0;i<nisdofs;i++) { 5712 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5713 } 5714 } 5715 if (nisneu) { 5716 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5717 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5718 } 5719 if (nisvert) { 5720 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5721 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5722 } 5723 5724 /* get some info after set from options */ 5725 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5726 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5727 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5728 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5729 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5730 isbddc = PETSC_FALSE; 5731 } 5732 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5733 if (isredundant) { 5734 KSP inner_ksp; 5735 PC inner_pc; 5736 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5737 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5738 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5739 } 5740 5741 /* assemble coarse matrix */ 5742 if (coarse_reuse) { 5743 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5744 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5745 coarse_mat_reuse = MAT_REUSE_MATRIX; 5746 } else { 5747 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5748 } 5749 if (isbddc || isnn) { 5750 if (isbddc) { /* currently there are no APIs for these options */ 5751 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5752 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5753 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5754 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5755 if (pcbddc_coarse->benign_compute_nonetflux) { 5756 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5757 } 5758 } 5759 if (pcbddc->coarsening_ratio > 1) { 5760 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5761 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5762 if (pcbddc->dbg_flag) { 5763 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5764 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5765 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5766 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5767 } 5768 } 5769 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5770 } else { 5771 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5772 coarse_mat = coarse_mat_is; 5773 } 5774 } else { 5775 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5776 } 5777 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5778 5779 /* propagate symmetry info of coarse matrix */ 5780 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5781 if (pc->pmat->symmetric_set) { 5782 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5783 } 5784 if (pc->pmat->hermitian_set) { 5785 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5786 } 5787 if (pc->pmat->spd_set) { 5788 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5789 } 5790 /* set operators */ 5791 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5792 if (pcbddc->dbg_flag) { 5793 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5794 } 5795 } else { /* processes non partecipating to coarse solver (if any) */ 5796 coarse_mat = 0; 5797 } 5798 ierr = PetscFree(isarray);CHKERRQ(ierr); 5799 #if 0 5800 { 5801 PetscViewer viewer; 5802 char filename[256]; 5803 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5804 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5805 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5806 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5807 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5808 } 5809 #endif 5810 5811 /* Compute coarse null space (special handling by BDDC only) */ 5812 #if 0 5813 if (pcbddc->NullSpace) { 5814 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5815 } 5816 #endif 5817 /* hack */ 5818 if (pcbddc->coarse_ksp) { 5819 Vec crhs,csol; 5820 5821 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5822 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5823 if (!csol) { 5824 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5825 } 5826 if (!crhs) { 5827 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5828 } 5829 } 5830 5831 /* compute null space for coarse solver if the benign trick has been requested */ 5832 if (pcbddc->benign_null) { 5833 5834 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5835 for (i=0;i<pcbddc->benign_n;i++) { 5836 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5837 } 5838 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5839 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5840 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5841 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5842 if (coarse_mat) { 5843 Vec nullv; 5844 PetscScalar *array,*array2; 5845 PetscInt nl; 5846 5847 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5848 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5849 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5850 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5851 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5852 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5853 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5854 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5855 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5856 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5857 } 5858 } 5859 5860 if (pcbddc->coarse_ksp) { 5861 PetscBool ispreonly; 5862 5863 if (CoarseNullSpace) { 5864 PetscBool isnull; 5865 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5866 if (0) { 5867 if (isbddc && !pcbddc->benign_saddle_point) { 5868 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5869 } else { 5870 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5871 } 5872 } else { 5873 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5874 } 5875 } 5876 /* setup coarse ksp */ 5877 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5878 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5879 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5880 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5881 KSP check_ksp; 5882 KSPType check_ksp_type; 5883 PC check_pc; 5884 Vec check_vec,coarse_vec; 5885 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5886 PetscInt its; 5887 PetscBool compute_eigs; 5888 PetscReal *eigs_r,*eigs_c; 5889 PetscInt neigs; 5890 const char *prefix; 5891 5892 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5893 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5894 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5895 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5896 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5897 if (ispreonly) { 5898 check_ksp_type = KSPPREONLY; 5899 compute_eigs = PETSC_FALSE; 5900 } else { 5901 check_ksp_type = KSPGMRES; 5902 compute_eigs = PETSC_TRUE; 5903 } 5904 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5905 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5906 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5907 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5908 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5909 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5910 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5911 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5912 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5913 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5914 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5915 /* create random vec */ 5916 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5917 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5918 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5919 if (CoarseNullSpace) { 5920 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5921 } 5922 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5923 /* solve coarse problem */ 5924 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5925 if (CoarseNullSpace) { 5926 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5927 } 5928 /* set eigenvalue estimation if preonly has not been requested */ 5929 if (compute_eigs) { 5930 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5931 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5932 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5933 lambda_max = eigs_r[neigs-1]; 5934 lambda_min = eigs_r[0]; 5935 if (pcbddc->use_coarse_estimates) { 5936 if (lambda_max>lambda_min) { 5937 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5938 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5939 } 5940 } 5941 } 5942 5943 /* check coarse problem residual error */ 5944 if (pcbddc->dbg_flag) { 5945 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5946 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5947 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5948 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5949 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5950 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5951 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5952 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5953 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5954 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5955 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5956 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5957 if (CoarseNullSpace) { 5958 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5959 } 5960 if (compute_eigs) { 5961 PetscReal lambda_max_s,lambda_min_s; 5962 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5963 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5964 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5965 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5966 for (i=0;i<neigs;i++) { 5967 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5968 } 5969 } 5970 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5971 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5972 } 5973 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5974 if (compute_eigs) { 5975 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5976 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5977 } 5978 } 5979 } 5980 /* print additional info */ 5981 if (pcbddc->dbg_flag) { 5982 /* waits until all processes reaches this point */ 5983 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5984 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5985 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5986 } 5987 5988 /* free memory */ 5989 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5990 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5991 PetscFunctionReturn(0); 5992 } 5993 5994 #undef __FUNCT__ 5995 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5996 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5997 { 5998 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5999 PC_IS* pcis = (PC_IS*)pc->data; 6000 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6001 IS subset,subset_mult,subset_n; 6002 PetscInt local_size,coarse_size=0; 6003 PetscInt *local_primal_indices=NULL; 6004 const PetscInt *t_local_primal_indices; 6005 PetscErrorCode ierr; 6006 6007 PetscFunctionBegin; 6008 /* Compute global number of coarse dofs */ 6009 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 6010 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6011 } 6012 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6013 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6014 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6015 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6016 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6017 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6018 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6019 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6020 if (local_size != pcbddc->local_primal_size) { 6021 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 6022 } 6023 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6024 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6025 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6026 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6027 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6028 6029 /* check numbering */ 6030 if (pcbddc->dbg_flag) { 6031 PetscScalar coarsesum,*array,*array2; 6032 PetscInt i; 6033 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6034 6035 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6036 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6037 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6038 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6039 /* counter */ 6040 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6041 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6042 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6043 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6044 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6045 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6046 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6047 for (i=0;i<pcbddc->local_primal_size;i++) { 6048 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6049 } 6050 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6051 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6052 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6053 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6054 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6055 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6056 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6057 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6058 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6059 for (i=0;i<pcis->n;i++) { 6060 if (array[i] != 0.0 && array[i] != array2[i]) { 6061 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6062 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6063 set_error = PETSC_TRUE; 6064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6065 } 6066 } 6067 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6068 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6070 for (i=0;i<pcis->n;i++) { 6071 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6072 } 6073 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6074 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6075 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6076 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6077 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6078 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6079 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6080 PetscInt *gidxs; 6081 6082 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6083 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6087 for (i=0;i<pcbddc->local_primal_size;i++) { 6088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6089 } 6090 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6091 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6092 } 6093 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6094 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6095 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6096 } 6097 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6098 /* get back data */ 6099 *coarse_size_n = coarse_size; 6100 *local_primal_indices_n = local_primal_indices; 6101 PetscFunctionReturn(0); 6102 } 6103 6104 #undef __FUNCT__ 6105 #define __FUNCT__ "PCBDDCGlobalToLocal" 6106 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6107 { 6108 IS localis_t; 6109 PetscInt i,lsize,*idxs,n; 6110 PetscScalar *vals; 6111 PetscErrorCode ierr; 6112 6113 PetscFunctionBegin; 6114 /* get indices in local ordering exploiting local to global map */ 6115 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6116 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6117 for (i=0;i<lsize;i++) vals[i] = 1.0; 6118 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6119 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6120 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6121 if (idxs) { /* multilevel guard */ 6122 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6123 } 6124 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6125 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6126 ierr = PetscFree(vals);CHKERRQ(ierr); 6127 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6128 /* now compute set in local ordering */ 6129 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6130 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6131 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6132 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6133 for (i=0,lsize=0;i<n;i++) { 6134 if (PetscRealPart(vals[i]) > 0.5) { 6135 lsize++; 6136 } 6137 } 6138 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6139 for (i=0,lsize=0;i<n;i++) { 6140 if (PetscRealPart(vals[i]) > 0.5) { 6141 idxs[lsize++] = i; 6142 } 6143 } 6144 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6146 *localis = localis_t; 6147 PetscFunctionReturn(0); 6148 } 6149 6150 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6151 #undef __FUNCT__ 6152 #define __FUNCT__ "PCBDDCMatMult_Private" 6153 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6154 { 6155 PCBDDCChange_ctx change_ctx; 6156 PetscErrorCode ierr; 6157 6158 PetscFunctionBegin; 6159 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6160 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6161 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6162 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6163 PetscFunctionReturn(0); 6164 } 6165 6166 #undef __FUNCT__ 6167 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6168 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6169 { 6170 PCBDDCChange_ctx change_ctx; 6171 PetscErrorCode ierr; 6172 6173 PetscFunctionBegin; 6174 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6175 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6176 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6177 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6178 PetscFunctionReturn(0); 6179 } 6180 6181 #undef __FUNCT__ 6182 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6183 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6184 { 6185 PC_IS *pcis=(PC_IS*)pc->data; 6186 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6187 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6188 Mat S_j; 6189 PetscInt *used_xadj,*used_adjncy; 6190 PetscBool free_used_adj; 6191 PetscErrorCode ierr; 6192 6193 PetscFunctionBegin; 6194 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6195 free_used_adj = PETSC_FALSE; 6196 if (pcbddc->sub_schurs_layers == -1) { 6197 used_xadj = NULL; 6198 used_adjncy = NULL; 6199 } else { 6200 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6201 used_xadj = pcbddc->mat_graph->xadj; 6202 used_adjncy = pcbddc->mat_graph->adjncy; 6203 } else if (pcbddc->computed_rowadj) { 6204 used_xadj = pcbddc->mat_graph->xadj; 6205 used_adjncy = pcbddc->mat_graph->adjncy; 6206 } else { 6207 PetscBool flg_row=PETSC_FALSE; 6208 const PetscInt *xadj,*adjncy; 6209 PetscInt nvtxs; 6210 6211 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6212 if (flg_row) { 6213 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6214 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6215 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6216 free_used_adj = PETSC_TRUE; 6217 } else { 6218 pcbddc->sub_schurs_layers = -1; 6219 used_xadj = NULL; 6220 used_adjncy = NULL; 6221 } 6222 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6223 } 6224 } 6225 6226 /* setup sub_schurs data */ 6227 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6228 if (!sub_schurs->schur_explicit) { 6229 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6230 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6231 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6232 } else { 6233 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6234 PetscBool isseqaij; 6235 PetscInt benign_n; 6236 Mat change = NULL; 6237 IS change_primal = NULL; 6238 6239 if (!pcbddc->use_vertices && reuse_solvers) { 6240 PetscInt n_vertices; 6241 6242 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6243 reuse_solvers = (PetscBool)!n_vertices; 6244 } 6245 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6246 if (!isseqaij) { 6247 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6248 if (matis->A == pcbddc->local_mat) { 6249 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6250 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6251 } else { 6252 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6253 } 6254 } 6255 if (!pcbddc->benign_change_explicit) { 6256 benign_n = pcbddc->benign_n; 6257 } else { 6258 benign_n = 0; 6259 } 6260 /* If the user defines additional constraints, we import them here. 6261 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6262 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6263 PC_IS *pcisf; 6264 PC_BDDC *pcbddcf; 6265 PC pcf; 6266 6267 if (pcbddc->sub_schurs_rebuild) { 6268 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6269 } 6270 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6271 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6272 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6273 /* hacks */ 6274 pcisf = (PC_IS*)pcf->data; 6275 pcisf->is_B_local = pcis->is_B_local; 6276 pcisf->vec1_N = pcis->vec1_N; 6277 pcisf->BtoNmap = pcis->BtoNmap; 6278 pcisf->n = pcis->n; 6279 pcisf->n_B = pcis->n_B; 6280 pcbddcf = (PC_BDDC*)pcf->data; 6281 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6282 pcbddcf->mat_graph = pcbddc->mat_graph; 6283 pcbddcf->use_faces = PETSC_TRUE; 6284 pcbddcf->use_change_of_basis = PETSC_TRUE; 6285 pcbddcf->use_change_on_faces = PETSC_TRUE; 6286 pcbddcf->use_qr_single = PETSC_TRUE; 6287 pcbddcf->fake_change = PETSC_TRUE; 6288 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6289 /* store information on primal vertices and change of basis (in local numbering) */ 6290 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6291 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6292 change = pcbddcf->ConstraintMatrix; 6293 pcbddcf->ConstraintMatrix = NULL; 6294 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6295 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6296 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6297 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6298 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6299 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6300 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6301 pcf->ops->destroy = NULL; 6302 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6303 } 6304 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6305 ierr = MatDestroy(&change);CHKERRQ(ierr); 6306 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6307 } 6308 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6309 6310 /* free adjacency */ 6311 if (free_used_adj) { 6312 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6313 } 6314 PetscFunctionReturn(0); 6315 } 6316 6317 #undef __FUNCT__ 6318 #define __FUNCT__ "PCBDDCInitSubSchurs" 6319 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6320 { 6321 PC_IS *pcis=(PC_IS*)pc->data; 6322 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6323 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6324 PCBDDCGraph graph; 6325 PetscErrorCode ierr; 6326 6327 PetscFunctionBegin; 6328 /* attach interface graph for determining subsets */ 6329 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6330 IS verticesIS,verticescomm; 6331 PetscInt vsize,*idxs; 6332 6333 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6334 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6335 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6336 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6337 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6338 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6339 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6340 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6341 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6342 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6343 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6344 /* 6345 if (pcbddc->dbg_flag) { 6346 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6347 } 6348 */ 6349 } else { 6350 graph = pcbddc->mat_graph; 6351 } 6352 6353 /* sub_schurs init */ 6354 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6355 6356 /* free graph struct */ 6357 if (pcbddc->sub_schurs_rebuild) { 6358 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6359 } 6360 PetscFunctionReturn(0); 6361 } 6362 6363 #undef __FUNCT__ 6364 #define __FUNCT__ "PCBDDCCheckOperator" 6365 PetscErrorCode PCBDDCCheckOperator(PC pc) 6366 { 6367 PC_IS *pcis=(PC_IS*)pc->data; 6368 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6369 PetscErrorCode ierr; 6370 6371 PetscFunctionBegin; 6372 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6373 IS zerodiag = NULL; 6374 Mat S_j,B0_B=NULL; 6375 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6376 PetscScalar *p0_check,*array,*array2; 6377 PetscReal norm; 6378 PetscInt i; 6379 6380 /* B0 and B0_B */ 6381 if (zerodiag) { 6382 IS dummy; 6383 6384 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6385 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6386 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6387 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6388 } 6389 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6390 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6391 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6392 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6393 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6394 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6395 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6396 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6397 /* S_j */ 6398 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6399 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6400 6401 /* mimic vector in \widetilde{W}_\Gamma */ 6402 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6403 /* continuous in primal space */ 6404 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6405 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6406 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6407 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6408 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6409 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6410 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6411 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6412 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6413 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6414 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6415 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6416 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6417 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6418 6419 /* assemble rhs for coarse problem */ 6420 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6421 /* local with Schur */ 6422 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6423 if (zerodiag) { 6424 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6425 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6426 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6427 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6428 } 6429 /* sum on primal nodes the local contributions */ 6430 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6431 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6432 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6433 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6434 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6435 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6436 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6437 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6438 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6439 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6440 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6441 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6442 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6443 /* scale primal nodes (BDDC sums contibutions) */ 6444 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6445 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6446 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6447 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6448 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6449 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6450 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6451 /* global: \widetilde{B0}_B w_\Gamma */ 6452 if (zerodiag) { 6453 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6454 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6455 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6456 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6457 } 6458 /* BDDC */ 6459 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6460 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6461 6462 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6463 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6464 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6465 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6466 for (i=0;i<pcbddc->benign_n;i++) { 6467 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6468 } 6469 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6470 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6471 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6472 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6473 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6474 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6475 } 6476 PetscFunctionReturn(0); 6477 } 6478