1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 PetscInt *nnz; 799 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 800 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 801 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 802 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 803 for (i=0;i<pcbddc->benign_n;i++) { 804 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 805 nnz[i] = n - nnz[i]; 806 } 807 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 808 ierr = PetscFree(nnz);CHKERRQ(ierr); 809 } 810 811 for (i=0;i<pcbddc->benign_n;i++) { 812 PetscScalar *array; 813 PetscInt *idxs,j,nz,cum; 814 815 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 818 for (j=0;j<nz;j++) vals[j] = 1.; 819 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 823 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 824 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 825 cum = 0; 826 for (j=0;j<n;j++) { 827 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 828 vals[cum] = array[j]; 829 idxs_ins[cum] = j; 830 cum++; 831 } 832 } 833 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 834 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 835 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 840 } 841 } else { /* push */ 842 if (pcbddc->benign_change_explicit) { 843 PetscInt i; 844 845 for (i=0;i<pcbddc->benign_n;i++) { 846 PetscScalar *B0_vals; 847 PetscInt *B0_cols,B0_ncol; 848 849 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 850 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 851 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 852 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 853 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 859 } 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCAdaptiveSelection" 866 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 867 { 868 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 869 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 870 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 871 PetscBLASInt *B_iwork,*B_ifail; 872 PetscScalar *work,lwork; 873 PetscScalar *St,*S,*eigv; 874 PetscScalar *Sarray,*Starray; 875 PetscReal *eigs,thresh; 876 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 877 PetscBool allocated_S_St; 878 #if defined(PETSC_USE_COMPLEX) 879 PetscReal *rwork; 880 #endif 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 if (!sub_schurs->schur_explicit) { 885 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 886 } 887 888 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 if (pcbddc->dbg_flag) { 893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 if (pcbddc->dbg_flag) { 900 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 901 } 902 903 /* max size of subsets */ 904 mss = 0; 905 for (i=0;i<sub_schurs->n_subs;i++) { 906 PetscInt subset_size; 907 908 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 909 mss = PetscMax(mss,subset_size); 910 } 911 912 /* min/max and threshold */ 913 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 914 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 915 nmax = PetscMax(nmin,nmax); 916 allocated_S_St = PETSC_FALSE; 917 if (nmin) { 918 allocated_S_St = PETSC_TRUE; 919 } 920 921 /* allocate lapack workspace */ 922 cum = cum2 = 0; 923 maxneigs = 0; 924 for (i=0;i<sub_schurs->n_subs;i++) { 925 PetscInt n,subset_size; 926 927 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 928 n = PetscMin(subset_size,nmax); 929 cum += subset_size; 930 cum2 += subset_size*n; 931 maxneigs = PetscMax(maxneigs,n); 932 } 933 if (mss) { 934 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 935 PetscBLASInt B_itype = 1; 936 PetscBLASInt B_N = mss; 937 PetscReal zero = 0.0; 938 PetscReal eps = 0.0; /* dlamch? */ 939 940 B_lwork = -1; 941 S = NULL; 942 St = NULL; 943 eigs = NULL; 944 eigv = NULL; 945 B_iwork = NULL; 946 B_ifail = NULL; 947 #if defined(PETSC_USE_COMPLEX) 948 rwork = NULL; 949 #endif 950 thresh = 1.0; 951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 952 #if defined(PETSC_USE_COMPLEX) 953 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 954 #else 955 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 956 #endif 957 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 958 ierr = PetscFPTrapPop();CHKERRQ(ierr); 959 } else { 960 /* TODO */ 961 } 962 } else { 963 lwork = 0; 964 } 965 966 nv = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 968 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 969 } 970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 971 if (allocated_S_St) { 972 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 973 } 974 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 977 #endif 978 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 979 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 980 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 981 nv+cum,&pcbddc->adaptive_constraints_idxs, 982 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 983 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 984 985 maxneigs = 0; 986 cum = cum2 = cumarray = 0; 987 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 988 pcbddc->adaptive_constraints_data_ptr[0] = 0; 989 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 990 const PetscInt *idxs; 991 992 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 993 for (cum=0;cum<nv;cum++) { 994 pcbddc->adaptive_constraints_n[cum] = 1; 995 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 996 pcbddc->adaptive_constraints_data[cum] = 1.0; 997 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 998 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 999 } 1000 cum2 = cum; 1001 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1002 } 1003 1004 if (mss) { /* multilevel */ 1005 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1006 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1007 } 1008 1009 thresh = pcbddc->adaptive_threshold; 1010 for (i=0;i<sub_schurs->n_subs;i++) { 1011 const PetscInt *idxs; 1012 PetscReal infty = PETSC_MAX_REAL; 1013 PetscInt j,subset_size,eigs_start = 0; 1014 PetscBLASInt B_N; 1015 PetscBool same_data = PETSC_FALSE; 1016 1017 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1018 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1019 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1020 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1021 if (sub_schurs->is_hermitian) { 1022 PetscInt j,k; 1023 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1024 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1026 } 1027 for (j=0;j<subset_size;j++) { 1028 for (k=j;k<subset_size;k++) { 1029 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1030 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1031 } 1032 } 1033 } else { 1034 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1035 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1036 } 1037 } else { 1038 S = Sarray + cumarray; 1039 St = Starray + cumarray; 1040 } 1041 1042 /* see if we can save some work */ 1043 if (sub_schurs->n_subs == 1) { 1044 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1045 } 1046 1047 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1048 B_neigs = 0; 1049 } else { 1050 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1051 PetscBLASInt B_itype = 1; 1052 PetscBLASInt B_IL, B_IU; 1053 PetscReal eps = -1.0; /* dlamch? */ 1054 PetscInt nmin_s; 1055 1056 if (pcbddc->dbg_flag) { 1057 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1058 } 1059 1060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1061 if (thresh > 1.+PETSC_SMALL) { 1062 1063 /* ask for eigenvalues larger than thresh */ 1064 #if defined(PETSC_USE_COMPLEX) 1065 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1066 #else 1067 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1068 #endif 1069 } else { 1070 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1071 B_IL = 1; 1072 #if defined(PETSC_USE_COMPLEX) 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1074 #else 1075 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1076 #endif 1077 } 1078 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1079 if (B_ierr) { 1080 if (B_ierr < 0 ) { 1081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1082 } else if (B_ierr <= B_N) { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1084 } else { 1085 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1086 } 1087 } 1088 1089 if (B_neigs > nmax) { 1090 if (pcbddc->dbg_flag) { 1091 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1092 } 1093 eigs_start = B_neigs -nmax; 1094 B_neigs = nmax; 1095 } 1096 1097 nmin_s = PetscMin(nmin,B_N); 1098 if (B_neigs < nmin_s) { 1099 PetscBLASInt B_neigs2; 1100 1101 B_IU = B_N - B_neigs; 1102 B_IL = B_N - nmin_s + 1; 1103 if (pcbddc->dbg_flag) { 1104 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1105 } 1106 if (sub_schurs->is_hermitian) { 1107 PetscInt j; 1108 for (j=0;j<subset_size;j++) { 1109 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1110 } 1111 for (j=0;j<subset_size;j++) { 1112 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1113 } 1114 } else { 1115 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1116 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1117 } 1118 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1119 #if defined(PETSC_USE_COMPLEX) 1120 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1121 #else 1122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1123 #endif 1124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1125 B_neigs += B_neigs2; 1126 } 1127 if (B_ierr) { 1128 if (B_ierr < 0 ) { 1129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1130 } else if (B_ierr <= B_N) { 1131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1132 } else { 1133 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1134 } 1135 } 1136 if (pcbddc->dbg_flag) { 1137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1138 for (j=0;j<B_neigs;j++) { 1139 if (eigs[j] == 0.0) { 1140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1141 } else { 1142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1143 } 1144 } 1145 } 1146 } else { 1147 /* TODO */ 1148 } 1149 } 1150 maxneigs = PetscMax(B_neigs,maxneigs); 1151 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1152 if (B_neigs) { 1153 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1154 1155 if (pcbddc->dbg_flag > 1) { 1156 PetscInt ii; 1157 for (ii=0;ii<B_neigs;ii++) { 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1159 for (j=0;j<B_N;j++) { 1160 #if defined(PETSC_USE_COMPLEX) 1161 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1162 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1164 #else 1165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1166 #endif 1167 } 1168 } 1169 } 1170 #if 0 1171 for (j=0;j<B_neigs;j++) { 1172 PetscBLASInt Blas_N,Blas_one = 1.0; 1173 PetscScalar norm; 1174 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1175 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1176 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1177 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1178 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1179 } else { 1180 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1181 } 1182 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1183 } 1184 #endif 1185 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1186 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1187 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1188 cum++; 1189 } 1190 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1191 /* shift for next computation */ 1192 cumarray += subset_size*subset_size; 1193 } 1194 if (pcbddc->dbg_flag) { 1195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1196 } 1197 1198 if (mss) { 1199 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1200 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1201 /* destroy matrices (junk) */ 1202 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1203 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1204 } 1205 if (allocated_S_St) { 1206 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1207 } 1208 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1209 #if defined(PETSC_USE_COMPLEX) 1210 ierr = PetscFree(rwork);CHKERRQ(ierr); 1211 #endif 1212 if (pcbddc->dbg_flag) { 1213 PetscInt maxneigs_r; 1214 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "PCBDDCSetUpSolvers" 1222 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1223 { 1224 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1225 PetscScalar *coarse_submat_vals; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1230 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1231 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1232 1233 /* Setup local neumann solver ksp_R */ 1234 /* PCBDDCSetUpLocalScatters should be called first! */ 1235 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1236 1237 /* Change global null space passed in by the user if change of basis has been requested */ 1238 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1239 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1240 } 1241 1242 /* 1243 Setup local correction and local part of coarse basis. 1244 Gives back the dense local part of the coarse matrix in column major ordering 1245 */ 1246 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1247 1248 /* Compute total number of coarse nodes and setup coarse solver */ 1249 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1250 1251 /* free */ 1252 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "PCBDDCResetCustomization" 1258 PetscErrorCode PCBDDCResetCustomization(PC pc) 1259 { 1260 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1265 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1266 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1267 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1268 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1269 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1270 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1271 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1272 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1273 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1274 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1275 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "PCBDDCResetTopography" 1281 PetscErrorCode PCBDDCResetTopography(PC pc) 1282 { 1283 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1284 PetscInt i; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1289 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1290 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1291 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1292 for (i=0;i<pcbddc->n_local_subs;i++) { 1293 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1296 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "PCBDDCResetSolvers" 1302 PetscErrorCode PCBDDCResetSolvers(PC pc) 1303 { 1304 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1309 if (pcbddc->coarse_phi_B) { 1310 PetscScalar *array; 1311 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1312 ierr = PetscFree(array);CHKERRQ(ierr); 1313 } 1314 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1315 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1316 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1317 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1318 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1319 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1320 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1321 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1322 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1323 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1324 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1325 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1328 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1329 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1330 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1331 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1332 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1333 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1334 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1335 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1336 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1338 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1339 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1341 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1342 if (pcbddc->benign_zerodiag_subs) { 1343 PetscInt i; 1344 for (i=0;i<pcbddc->benign_n;i++) { 1345 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1348 } 1349 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1355 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1356 { 1357 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1358 PC_IS *pcis = (PC_IS*)pc->data; 1359 VecType impVecType; 1360 PetscInt n_constraints,n_R,old_size; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (!pcbddc->ConstraintMatrix) { 1365 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1366 } 1367 /* get sizes */ 1368 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1369 n_R = pcis->n - pcbddc->n_vertices; 1370 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1371 /* local work vectors (try to avoid unneeded work)*/ 1372 /* R nodes */ 1373 old_size = -1; 1374 if (pcbddc->vec1_R) { 1375 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1376 } 1377 if (n_R != old_size) { 1378 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1379 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1380 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1381 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1382 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1383 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1384 } 1385 /* local primal dofs */ 1386 old_size = -1; 1387 if (pcbddc->vec1_P) { 1388 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1389 } 1390 if (pcbddc->local_primal_size != old_size) { 1391 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1392 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1393 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1394 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1395 } 1396 /* local explicit constraints */ 1397 old_size = -1; 1398 if (pcbddc->vec1_C) { 1399 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1400 } 1401 if (n_constraints && n_constraints != old_size) { 1402 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1403 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1404 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1405 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "PCBDDCSetUpCorrection" 1412 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1413 { 1414 PetscErrorCode ierr; 1415 /* pointers to pcis and pcbddc */ 1416 PC_IS* pcis = (PC_IS*)pc->data; 1417 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1418 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1419 /* submatrices of local problem */ 1420 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1421 /* submatrices for benign trick */ 1422 Mat B0_R = NULL, B0_V = NULL; 1423 /* submatrices of local coarse problem */ 1424 Mat S_VV,S_CV,S_VC,S_CC; 1425 /* working matrices */ 1426 Mat C_CR; 1427 /* additional working stuff */ 1428 PC pc_R; 1429 Mat F; 1430 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1431 1432 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1433 PetscScalar *work; 1434 PetscInt *idx_V_B; 1435 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1436 PetscInt i,n_R,n_D,n_B; 1437 1438 /* some shortcuts to scalars */ 1439 PetscScalar one=1.0,m_one=-1.0; 1440 1441 PetscFunctionBegin; 1442 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1444 } 1445 1446 /* Set Non-overlapping dimensions */ 1447 n_vertices = pcbddc->n_vertices; 1448 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1449 n_B = pcis->n_B; 1450 n_D = pcis->n - n_B; 1451 n_R = pcis->n - n_vertices; 1452 1453 /* vertices in boundary numbering */ 1454 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1455 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1456 if (i != n_vertices) { 1457 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1458 } 1459 1460 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1461 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1462 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1463 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1464 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1465 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1466 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1467 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1468 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1469 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1470 1471 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1472 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1473 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1474 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1475 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1476 lda_rhs = n_R; 1477 need_benign_correction = PETSC_FALSE; 1478 if (isLU || isILU || isCHOL) { 1479 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1480 } else if (sub_schurs->reuse_solver) { 1481 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1482 MatFactorType type; 1483 1484 F = reuse_solver->F; 1485 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1486 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1487 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1488 need_benign_correction = !!reuse_solver->benign_n; 1489 } else { 1490 F = NULL; 1491 } 1492 1493 /* allocate workspace */ 1494 n = 0; 1495 if (n_constraints) { 1496 n += lda_rhs*n_constraints; 1497 } 1498 if (n_vertices) { 1499 n = PetscMax(2*lda_rhs*n_vertices,n); 1500 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1501 } 1502 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1503 1504 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1505 if (n_constraints) { 1506 Mat M1,M2,M3; 1507 Mat auxmat; 1508 IS is_aux; 1509 PetscScalar *array,*array2; 1510 1511 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1512 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1513 1514 /* Extract constraints on R nodes: C_{CR} */ 1515 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1516 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1517 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1518 1519 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1520 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1521 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1522 for (i=0;i<n_constraints;i++) { 1523 const PetscScalar *row_cmat_values; 1524 const PetscInt *row_cmat_indices; 1525 PetscInt size_of_constraint,j; 1526 1527 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1528 for (j=0;j<size_of_constraint;j++) { 1529 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1530 } 1531 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1532 } 1533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1534 if (F) { 1535 Mat B; 1536 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1538 if (need_benign_correction) { 1539 PetscScalar *marr; 1540 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1541 1542 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1543 for (i=0;i<n_constraints;i++) { 1544 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1545 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1546 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1547 } 1548 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1549 } 1550 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1551 if (need_benign_correction) { 1552 PetscScalar *marr; 1553 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1554 1555 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1556 for (i=0;i<n_constraints;i++) { 1557 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1558 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1559 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1560 } 1561 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1562 } 1563 ierr = MatDestroy(&B);CHKERRQ(ierr); 1564 } else { 1565 PetscScalar *marr; 1566 1567 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1568 for (i=0;i<n_constraints;i++) { 1569 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1570 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1571 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1572 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1573 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1574 } 1575 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1576 } 1577 if (!pcbddc->switch_static) { 1578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1579 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1580 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1581 for (i=0;i<n_constraints;i++) { 1582 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1583 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1587 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1588 } 1589 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1590 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1591 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1592 } else { 1593 if (lda_rhs != n_R) { 1594 IS dummy; 1595 1596 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1597 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1598 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1599 } else { 1600 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1601 pcbddc->local_auxmat2 = local_auxmat2_R; 1602 } 1603 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1604 } 1605 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1606 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1607 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1608 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1609 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1610 if (isCHOL) { 1611 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1612 } else { 1613 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1614 } 1615 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1616 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1617 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1618 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1619 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1620 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1621 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1622 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1623 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1624 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1625 } 1626 1627 /* Extract B0_R */ 1628 if (pcbddc->benign_n) { 1629 IS dummy; 1630 PetscReal norm; 1631 1632 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1633 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1634 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1635 if (norm < PETSC_SMALL) { 1636 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1637 } 1638 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1639 } 1640 1641 /* Get submatrices from subdomain matrix */ 1642 if (n_vertices) { 1643 IS is_aux; 1644 1645 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1646 IS tis; 1647 1648 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1649 ierr = ISSort(tis);CHKERRQ(ierr); 1650 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1651 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1652 } else { 1653 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1654 } 1655 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1656 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1657 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1658 if (pcbddc->benign_n) { 1659 IS dummy; 1660 1661 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1662 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1663 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1664 } 1665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1666 } 1667 1668 /* Matrix of coarse basis functions (local) */ 1669 if (pcbddc->coarse_phi_B) { 1670 PetscInt on_B,on_primal,on_D=n_D; 1671 if (pcbddc->coarse_phi_D) { 1672 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1673 } 1674 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1675 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1676 PetscScalar *marray; 1677 1678 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1679 ierr = PetscFree(marray);CHKERRQ(ierr); 1680 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1681 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1682 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1683 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1684 } 1685 } 1686 1687 if (!pcbddc->coarse_phi_B) { 1688 PetscScalar *marray; 1689 1690 n = n_B*pcbddc->local_primal_size; 1691 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1692 n += n_D*pcbddc->local_primal_size; 1693 } 1694 if (!pcbddc->symmetric_primal) { 1695 n *= 2; 1696 } 1697 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1698 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1699 n = n_B*pcbddc->local_primal_size; 1700 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1702 n += n_D*pcbddc->local_primal_size; 1703 } 1704 if (!pcbddc->symmetric_primal) { 1705 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1707 n = n_B*pcbddc->local_primal_size; 1708 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1709 } 1710 } else { 1711 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1712 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1713 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1714 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1715 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1716 } 1717 } 1718 } 1719 1720 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1721 p0_lidx_I = NULL; 1722 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1723 const PetscInt *idxs; 1724 1725 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1726 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1727 for (i=0;i<pcbddc->benign_n;i++) { 1728 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1729 } 1730 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1731 } 1732 1733 /* vertices */ 1734 if (n_vertices) { 1735 1736 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1737 1738 if (n_R) { 1739 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1740 PetscBLASInt B_N,B_one = 1; 1741 PetscScalar *x,*y; 1742 PetscBool isseqaij; 1743 1744 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1745 if (need_benign_correction) { 1746 ISLocalToGlobalMapping RtoN; 1747 IS is_p0; 1748 PetscInt *idxs_p0,n; 1749 1750 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1751 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1752 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1753 if (n != pcbddc->benign_n) { 1754 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1755 } 1756 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1757 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1758 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1759 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1760 } 1761 1762 if (lda_rhs == n_R) { 1763 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1764 } else { 1765 PetscScalar *av,*array; 1766 const PetscInt *xadj,*adjncy; 1767 PetscInt n; 1768 PetscBool flg_row; 1769 1770 array = work+lda_rhs*n_vertices; 1771 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1772 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1774 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1775 for (i=0;i<n;i++) { 1776 PetscInt j; 1777 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1778 } 1779 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1780 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1781 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1782 } 1783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1784 if (F) { 1785 if (need_benign_correction) { 1786 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1787 PetscScalar *marr; 1788 1789 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1790 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1791 1792 | 0 0 0 | (V) 1793 L = | 0 0 -1 | (P-p0) 1794 | 0 0 -1 | (p0) 1795 1796 */ 1797 for (i=0;i<reuse_solver->benign_n;i++) { 1798 const PetscScalar *vals; 1799 const PetscInt *idxs,*idxs_zero; 1800 PetscInt n,j,nz; 1801 1802 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1803 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1804 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1805 for (j=0;j<n;j++) { 1806 PetscScalar val = vals[j]; 1807 PetscInt k,col = idxs[j]; 1808 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1809 } 1810 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1811 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1812 } 1813 /* need to correct the rhs */ 1814 for (i=0;i<n_vertices;i++) { 1815 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1816 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1817 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1818 } 1819 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1820 } 1821 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1822 /* need to correct the solution */ 1823 if (need_benign_correction) { 1824 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1825 PetscScalar *marr; 1826 1827 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1828 for (i=0;i<n_vertices;i++) { 1829 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1830 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1831 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1832 } 1833 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1834 } 1835 } else { 1836 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1837 for (i=0;i<n_vertices;i++) { 1838 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1839 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1840 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1841 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1842 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1843 } 1844 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1845 } 1846 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1847 /* S_VV and S_CV */ 1848 if (n_constraints) { 1849 Mat B; 1850 1851 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1852 for (i=0;i<n_vertices;i++) { 1853 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1854 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1858 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1859 } 1860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1861 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1862 ierr = MatDestroy(&B);CHKERRQ(ierr); 1863 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1864 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1865 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1867 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1868 ierr = MatDestroy(&B);CHKERRQ(ierr); 1869 } 1870 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1871 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1872 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1873 } 1874 if (lda_rhs != n_R) { 1875 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1877 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1878 } 1879 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1880 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1881 if (need_benign_correction) { 1882 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1883 PetscScalar *marr,*sums; 1884 1885 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1886 ierr = MatDenseGetArray(S_VVt,&marr); 1887 for (i=0;i<reuse_solver->benign_n;i++) { 1888 const PetscScalar *vals; 1889 const PetscInt *idxs,*idxs_zero; 1890 PetscInt n,j,nz; 1891 1892 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1893 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1894 for (j=0;j<n_vertices;j++) { 1895 PetscInt k; 1896 sums[j] = 0.; 1897 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1898 } 1899 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1900 for (j=0;j<n;j++) { 1901 PetscScalar val = vals[j]; 1902 PetscInt k; 1903 for (k=0;k<n_vertices;k++) { 1904 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1905 } 1906 } 1907 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1908 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1909 } 1910 ierr = PetscFree(sums);CHKERRQ(ierr); 1911 ierr = MatDenseRestoreArray(S_VVt,&marr); 1912 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1913 } 1914 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1915 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1916 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1917 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1918 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1919 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1920 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1921 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1922 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1923 } else { 1924 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1925 } 1926 for (i=0;i<pcbddc->benign_n;i++) { 1927 const PetscScalar *vals; 1928 const PetscInt *idxs; 1929 PetscInt n,j,primal_idx; 1930 1931 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1932 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1933 for (j=0;j<n;j++) { 1934 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1935 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1936 } 1937 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1938 } 1939 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1940 1941 /* coarse basis functions */ 1942 for (i=0;i<n_vertices;i++) { 1943 PetscScalar *y; 1944 1945 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1946 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1947 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1948 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1950 y[n_B*i+idx_V_B[i]] = 1.0; 1951 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1952 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1953 1954 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1955 PetscInt j; 1956 1957 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1958 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1959 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1962 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1963 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1964 } 1965 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1966 } 1967 /* if n_R == 0 the object is not destroyed */ 1968 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1969 } 1970 1971 if (n_constraints) { 1972 Mat B; 1973 1974 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1975 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1976 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1977 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1978 if (n_vertices) { 1979 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1980 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1981 } else { 1982 Mat S_VCt; 1983 1984 if (lda_rhs != n_R) { 1985 ierr = MatDestroy(&B);CHKERRQ(ierr); 1986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1987 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1988 } 1989 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1990 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1991 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1992 } 1993 } 1994 ierr = MatDestroy(&B);CHKERRQ(ierr); 1995 /* coarse basis functions */ 1996 for (i=0;i<n_constraints;i++) { 1997 PetscScalar *y; 1998 1999 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2000 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2005 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2006 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2007 PetscInt j; 2008 2009 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2010 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2011 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2014 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2015 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2016 } 2017 if (B0_R) { 2018 Vec benign_proj; 2019 const PetscScalar *array; 2020 PetscInt j; 2021 2022 ierr = MatCreateVecs(B0_R,NULL,&benign_proj);CHKERRQ(ierr); 2023 ierr = MatMult(B0_R,pcbddc->vec1_R,benign_proj);CHKERRQ(ierr); 2024 ierr = VecGetArrayRead(benign_proj,&array);CHKERRQ(ierr); 2025 for (j=0;j<pcbddc->benign_n;j++) { 2026 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2027 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i+n_vertices] = array[j]; 2028 coarse_submat_vals[(i+n_vertices)*pcbddc->local_primal_size+primal_idx] = array[j]; 2029 } 2030 ierr = VecRestoreArrayRead(benign_proj,&array);CHKERRQ(ierr); 2031 ierr = VecDestroy(&benign_proj);CHKERRQ(ierr); 2032 } 2033 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2034 } 2035 } 2036 if (n_constraints) { 2037 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2038 } 2039 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2040 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2041 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 2042 2043 /* compute other basis functions for non-symmetric problems */ 2044 if (!pcbddc->symmetric_primal) { 2045 Mat B_V=NULL,B_C=NULL; 2046 PetscScalar *marray; 2047 2048 if (n_constraints) { 2049 Mat S_CCT,C_CRT; 2050 2051 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2052 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2053 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2054 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2055 if (n_vertices) { 2056 Mat S_VCT; 2057 2058 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2059 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2060 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2061 } 2062 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2063 } 2064 if (n_vertices && n_R) { 2065 PetscScalar *av,*marray; 2066 const PetscInt *xadj,*adjncy; 2067 PetscInt n; 2068 PetscBool flg_row; 2069 2070 /* B_V = B_V - A_VR^T */ 2071 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2072 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2073 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2074 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2075 for (i=0;i<n;i++) { 2076 PetscInt j; 2077 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2078 } 2079 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2080 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2081 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2082 } 2083 2084 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2085 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2086 for (i=0;i<n_vertices;i++) { 2087 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2088 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2089 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2090 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2091 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2092 } 2093 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2094 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2095 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2096 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2097 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2098 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2099 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2100 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2101 } 2102 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2103 /* coarse basis functions */ 2104 for (i=0;i<pcbddc->local_primal_size;i++) { 2105 PetscScalar *y; 2106 2107 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2108 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2109 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2110 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2111 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 if (i<n_vertices) { 2113 y[n_B*i+idx_V_B[i]] = 1.0; 2114 } 2115 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2116 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2117 2118 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2119 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2120 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2121 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2122 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2123 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2124 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2125 } 2126 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2127 } 2128 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2129 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2130 } 2131 /* free memory */ 2132 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2133 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2134 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2135 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2136 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2137 ierr = PetscFree(work);CHKERRQ(ierr); 2138 if (n_vertices) { 2139 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2140 } 2141 if (n_constraints) { 2142 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2143 } 2144 /* Checking coarse_sub_mat and coarse basis functios */ 2145 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2146 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2147 if (pcbddc->dbg_flag) { 2148 Mat coarse_sub_mat; 2149 Mat AUXMAT,TM1,TM2,TM3,TM4; 2150 Mat coarse_phi_D,coarse_phi_B; 2151 Mat coarse_psi_D,coarse_psi_B; 2152 Mat A_II,A_BB,A_IB,A_BI; 2153 Mat C_B,CPHI; 2154 IS is_dummy; 2155 Vec mones; 2156 MatType checkmattype=MATSEQAIJ; 2157 PetscReal real_value; 2158 2159 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2160 Mat A; 2161 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2162 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2163 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2164 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2165 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2166 ierr = MatDestroy(&A);CHKERRQ(ierr); 2167 } else { 2168 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2169 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2170 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2171 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2172 } 2173 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2174 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2175 if (!pcbddc->symmetric_primal) { 2176 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2177 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2178 } 2179 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2180 2181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2184 if (!pcbddc->symmetric_primal) { 2185 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2186 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2187 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2188 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2189 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2190 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2191 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2192 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2193 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2194 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2195 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2196 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2197 } else { 2198 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2199 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2200 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2201 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2202 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2203 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2204 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2205 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2206 } 2207 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2208 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2209 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2210 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2211 if (pcbddc->benign_n) { 2212 Mat B0_B,B0_BPHI; 2213 PetscScalar *data,*data2; 2214 PetscInt j; 2215 2216 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2217 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2218 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2219 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2220 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2221 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2222 for (j=0;j<pcbddc->benign_n;j++) { 2223 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2224 for (i=0;i<pcbddc->local_primal_size;i++) { 2225 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2226 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2227 } 2228 } 2229 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2230 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2231 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2232 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2233 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2234 } 2235 #if 0 2236 { 2237 PetscViewer viewer; 2238 char filename[256]; 2239 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2240 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2241 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2242 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2243 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2244 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2245 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2246 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2247 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2248 if (pcbddc->coarse_phi_D) { 2249 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2250 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2251 } 2252 if (pcbddc->coarse_psi_B) { 2253 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2254 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2255 } 2256 if (pcbddc->coarse_phi_B) { 2257 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2258 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2259 } 2260 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2261 } 2262 #endif 2263 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2264 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2267 2268 /* check constraints */ 2269 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2270 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2271 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2272 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2273 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2274 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2275 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2276 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2278 if (!pcbddc->symmetric_primal) { 2279 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2280 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2281 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2282 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2283 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2284 } 2285 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2286 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2287 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2288 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2289 } 2290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2291 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2292 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2293 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2294 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2295 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2296 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2297 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2298 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2299 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2300 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2301 if (!pcbddc->symmetric_primal) { 2302 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2303 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2304 } 2305 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2306 } 2307 /* get back data */ 2308 *coarse_submat_vals_n = coarse_submat_vals; 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2314 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2315 { 2316 Mat *work_mat; 2317 IS isrow_s,iscol_s; 2318 PetscBool rsorted,csorted; 2319 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2320 PetscErrorCode ierr; 2321 2322 PetscFunctionBegin; 2323 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2324 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2325 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2326 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2327 2328 if (!rsorted) { 2329 const PetscInt *idxs; 2330 PetscInt *idxs_sorted,i; 2331 2332 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2333 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2334 for (i=0;i<rsize;i++) { 2335 idxs_perm_r[i] = i; 2336 } 2337 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2338 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2339 for (i=0;i<rsize;i++) { 2340 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2341 } 2342 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2343 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2344 } else { 2345 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2346 isrow_s = isrow; 2347 } 2348 2349 if (!csorted) { 2350 if (isrow == iscol) { 2351 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2352 iscol_s = isrow_s; 2353 } else { 2354 const PetscInt *idxs; 2355 PetscInt *idxs_sorted,i; 2356 2357 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2358 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2359 for (i=0;i<csize;i++) { 2360 idxs_perm_c[i] = i; 2361 } 2362 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2363 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2364 for (i=0;i<csize;i++) { 2365 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2366 } 2367 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2368 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2369 } 2370 } else { 2371 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2372 iscol_s = iscol; 2373 } 2374 2375 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2376 2377 if (!rsorted || !csorted) { 2378 Mat new_mat; 2379 IS is_perm_r,is_perm_c; 2380 2381 if (!rsorted) { 2382 PetscInt *idxs_r,i; 2383 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2384 for (i=0;i<rsize;i++) { 2385 idxs_r[idxs_perm_r[i]] = i; 2386 } 2387 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2388 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2389 } else { 2390 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2391 } 2392 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2393 2394 if (!csorted) { 2395 if (isrow_s == iscol_s) { 2396 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2397 is_perm_c = is_perm_r; 2398 } else { 2399 PetscInt *idxs_c,i; 2400 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2401 for (i=0;i<csize;i++) { 2402 idxs_c[idxs_perm_c[i]] = i; 2403 } 2404 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2405 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2406 } 2407 } else { 2408 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2409 } 2410 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2411 2412 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2413 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2414 work_mat[0] = new_mat; 2415 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2416 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2417 } 2418 2419 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2420 *B = work_mat[0]; 2421 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2422 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2423 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 #undef __FUNCT__ 2428 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2429 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2430 { 2431 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2432 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2433 Mat new_mat; 2434 IS is_local,is_global; 2435 PetscInt local_size; 2436 PetscBool isseqaij; 2437 PetscErrorCode ierr; 2438 2439 PetscFunctionBegin; 2440 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2441 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2442 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2443 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2444 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2445 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2446 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2447 2448 /* check */ 2449 if (pcbddc->dbg_flag) { 2450 Vec x,x_change; 2451 PetscReal error; 2452 2453 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2454 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2455 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2456 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2457 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2458 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2459 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2460 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2461 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2462 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2464 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2465 ierr = VecDestroy(&x);CHKERRQ(ierr); 2466 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2467 } 2468 2469 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2470 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2471 if (isseqaij) { 2472 Mat M; 2473 2474 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2475 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2476 ierr = MatDestroy(&M);CHKERRQ(ierr); 2477 } else { 2478 Mat work_mat,M; 2479 2480 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2481 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2482 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2483 ierr = MatDestroy(&M);CHKERRQ(ierr); 2484 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2485 } 2486 if (matis->A->symmetric_set) { 2487 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2488 #if !defined(PETSC_USE_COMPLEX) 2489 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2490 #endif 2491 } 2492 /* 2493 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2494 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2495 */ 2496 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2502 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2503 { 2504 PC_IS* pcis = (PC_IS*)(pc->data); 2505 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2506 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2507 PetscInt *idx_R_local=NULL; 2508 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2509 PetscInt vbs,bs; 2510 PetscBT bitmask=NULL; 2511 PetscErrorCode ierr; 2512 2513 PetscFunctionBegin; 2514 /* 2515 No need to setup local scatters if 2516 - primal space is unchanged 2517 AND 2518 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2519 AND 2520 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2521 */ 2522 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2523 PetscFunctionReturn(0); 2524 } 2525 /* destroy old objects */ 2526 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2527 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2528 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2529 /* Set Non-overlapping dimensions */ 2530 n_B = pcis->n_B; 2531 n_D = pcis->n - n_B; 2532 n_vertices = pcbddc->n_vertices; 2533 2534 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2535 2536 /* create auxiliary bitmask and allocate workspace */ 2537 if (!sub_schurs->reuse_solver) { 2538 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2539 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2540 for (i=0;i<n_vertices;i++) { 2541 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2542 } 2543 2544 for (i=0, n_R=0; i<pcis->n; i++) { 2545 if (!PetscBTLookup(bitmask,i)) { 2546 idx_R_local[n_R++] = i; 2547 } 2548 } 2549 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2550 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2551 2552 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2553 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2554 } 2555 2556 /* Block code */ 2557 vbs = 1; 2558 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2559 if (bs>1 && !(n_vertices%bs)) { 2560 PetscBool is_blocked = PETSC_TRUE; 2561 PetscInt *vary; 2562 if (!sub_schurs->reuse_solver) { 2563 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2564 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2565 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2566 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2567 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2568 for (i=0; i<pcis->n/bs; i++) { 2569 if (vary[i]!=0 && vary[i]!=bs) { 2570 is_blocked = PETSC_FALSE; 2571 break; 2572 } 2573 } 2574 ierr = PetscFree(vary);CHKERRQ(ierr); 2575 } else { 2576 /* Verify directly the R set */ 2577 for (i=0; i<n_R/bs; i++) { 2578 PetscInt j,node=idx_R_local[bs*i]; 2579 for (j=1; j<bs; j++) { 2580 if (node != idx_R_local[bs*i+j]-j) { 2581 is_blocked = PETSC_FALSE; 2582 break; 2583 } 2584 } 2585 } 2586 } 2587 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2588 vbs = bs; 2589 for (i=0;i<n_R/vbs;i++) { 2590 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2591 } 2592 } 2593 } 2594 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2595 if (sub_schurs->reuse_solver) { 2596 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2597 2598 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2599 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2600 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2601 reuse_solver->is_R = pcbddc->is_R_local; 2602 } else { 2603 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2604 } 2605 2606 /* print some info if requested */ 2607 if (pcbddc->dbg_flag) { 2608 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2609 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2610 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2614 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2615 } 2616 2617 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2618 if (!sub_schurs->reuse_solver) { 2619 IS is_aux1,is_aux2; 2620 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2621 2622 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2623 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2624 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2625 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2626 for (i=0; i<n_D; i++) { 2627 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2628 } 2629 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2630 for (i=0, j=0; i<n_R; i++) { 2631 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2632 aux_array1[j++] = i; 2633 } 2634 } 2635 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2636 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2637 for (i=0, j=0; i<n_B; i++) { 2638 if (!PetscBTLookup(bitmask,is_indices[i])) { 2639 aux_array2[j++] = i; 2640 } 2641 } 2642 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2643 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2644 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2645 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2646 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2647 2648 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2649 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2650 for (i=0, j=0; i<n_R; i++) { 2651 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2652 aux_array1[j++] = i; 2653 } 2654 } 2655 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2656 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2657 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2658 } 2659 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2660 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2661 } else { 2662 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2663 IS tis; 2664 PetscInt schur_size; 2665 2666 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2667 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2668 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2669 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2670 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2671 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2672 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2673 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2674 } 2675 } 2676 PetscFunctionReturn(0); 2677 } 2678 2679 2680 #undef __FUNCT__ 2681 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2682 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2683 { 2684 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2685 PC_IS *pcis = (PC_IS*)pc->data; 2686 PC pc_temp; 2687 Mat A_RR; 2688 MatReuse reuse; 2689 PetscScalar m_one = -1.0; 2690 PetscReal value; 2691 PetscInt n_D,n_R; 2692 PetscBool use_exact,use_exact_reduced,issbaij; 2693 PetscErrorCode ierr; 2694 /* prefixes stuff */ 2695 char dir_prefix[256],neu_prefix[256],str_level[16]; 2696 size_t len; 2697 2698 PetscFunctionBegin; 2699 2700 /* compute prefixes */ 2701 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2702 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2703 if (!pcbddc->current_level) { 2704 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2705 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2706 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2707 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2708 } else { 2709 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2710 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2711 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2712 len -= 15; /* remove "pc_bddc_coarse_" */ 2713 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2714 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2715 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2716 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2717 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2718 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2719 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2720 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2721 } 2722 2723 /* DIRICHLET PROBLEM */ 2724 if (dirichlet) { 2725 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2726 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2727 if (!sub_schurs->reuse_solver) { 2728 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2729 } 2730 if (pcbddc->dbg_flag) { 2731 Mat A_IIn; 2732 2733 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2734 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2735 pcis->A_II = A_IIn; 2736 } 2737 } 2738 if (pcbddc->local_mat->symmetric_set) { 2739 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2740 } 2741 /* Matrix for Dirichlet problem is pcis->A_II */ 2742 n_D = pcis->n - pcis->n_B; 2743 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2744 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2745 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2746 /* default */ 2747 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2748 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2749 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2750 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2751 if (issbaij) { 2752 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2753 } else { 2754 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2755 } 2756 /* Allow user's customization */ 2757 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2758 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2759 } 2760 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2761 if (sub_schurs->reuse_solver) { 2762 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2763 2764 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2765 } 2766 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2767 if (!n_D) { 2768 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2769 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2770 } 2771 /* Set Up KSP for Dirichlet problem of BDDC */ 2772 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2773 /* set ksp_D into pcis data */ 2774 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2775 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2776 pcis->ksp_D = pcbddc->ksp_D; 2777 } 2778 2779 /* NEUMANN PROBLEM */ 2780 A_RR = 0; 2781 if (neumann) { 2782 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2783 PetscInt ibs,mbs; 2784 PetscBool issbaij; 2785 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2786 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2787 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2788 if (pcbddc->ksp_R) { /* already created ksp */ 2789 PetscInt nn_R; 2790 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2791 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2792 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2793 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2794 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2795 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2796 reuse = MAT_INITIAL_MATRIX; 2797 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2798 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2799 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2800 reuse = MAT_INITIAL_MATRIX; 2801 } else { /* safe to reuse the matrix */ 2802 reuse = MAT_REUSE_MATRIX; 2803 } 2804 } 2805 /* last check */ 2806 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2807 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2808 reuse = MAT_INITIAL_MATRIX; 2809 } 2810 } else { /* first time, so we need to create the matrix */ 2811 reuse = MAT_INITIAL_MATRIX; 2812 } 2813 /* extract A_RR */ 2814 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2815 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2816 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2817 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2818 if (matis->A == pcbddc->local_mat) { 2819 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2820 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2821 } else { 2822 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2823 } 2824 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2825 if (matis->A == pcbddc->local_mat) { 2826 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2827 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2828 } else { 2829 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2830 } 2831 } 2832 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2833 if (pcbddc->local_mat->symmetric_set) { 2834 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2835 } 2836 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2837 Mat A_RRn; 2838 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RRn);CHKERRQ(ierr); 2839 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2840 A_RR = A_RRn; 2841 } 2842 if (!pcbddc->ksp_R) { /* create object if not present */ 2843 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2844 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2845 /* default */ 2846 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2847 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2848 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2849 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2850 if (issbaij) { 2851 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2852 } else { 2853 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2854 } 2855 /* Allow user's customization */ 2856 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2857 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2858 } 2859 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2860 if (!n_R) { 2861 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2862 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2863 } 2864 /* Reuse solver if it is present */ 2865 if (sub_schurs->reuse_solver) { 2866 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2867 2868 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2869 } 2870 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2871 /* Set Up KSP for Neumann problem of BDDC */ 2872 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2873 } 2874 /* free Neumann problem's matrix */ 2875 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2876 2877 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2878 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2879 if (pcbddc->dbg_flag) { 2880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2881 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2882 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2883 } 2884 if (dirichlet) { /* Dirichlet */ 2885 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2886 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2887 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2888 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2889 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2890 /* need to be adapted? */ 2891 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2892 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2893 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2894 /* print info */ 2895 if (pcbddc->dbg_flag) { 2896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2898 } 2899 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2900 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2901 } 2902 } 2903 if (neumann) { /* Neumann */ 2904 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2905 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2906 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2907 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2908 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2909 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2910 /* need to be adapted? */ 2911 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2912 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2913 /* print info */ 2914 if (pcbddc->dbg_flag) { 2915 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2916 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2917 } 2918 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2919 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2920 } 2921 } 2922 } 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2928 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2929 { 2930 PetscErrorCode ierr; 2931 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2932 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2933 2934 PetscFunctionBegin; 2935 if (!sub_schurs->reuse_solver) { 2936 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2937 } 2938 if (!pcbddc->switch_static) { 2939 if (applytranspose && pcbddc->local_auxmat1) { 2940 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2941 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2942 } 2943 if (!sub_schurs->reuse_solver) { 2944 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2945 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2946 } else { 2947 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2948 2949 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2950 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2951 } 2952 } else { 2953 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2954 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2955 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2956 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2957 if (applytranspose && pcbddc->local_auxmat1) { 2958 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2959 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2960 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2961 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2962 } 2963 } 2964 if (!sub_schurs->reuse_solver) { 2965 if (applytranspose) { 2966 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2967 } else { 2968 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2969 } 2970 } else { 2971 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2972 2973 if (applytranspose) { 2974 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2975 } else { 2976 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2977 } 2978 } 2979 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2980 if (!pcbddc->switch_static) { 2981 if (!sub_schurs->reuse_solver) { 2982 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2983 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2984 } else { 2985 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2986 2987 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2988 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2989 } 2990 if (!applytranspose && pcbddc->local_auxmat1) { 2991 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2992 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2993 } 2994 } else { 2995 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2996 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2997 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2998 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2999 if (!applytranspose && pcbddc->local_auxmat1) { 3000 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3001 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3002 } 3003 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3004 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3005 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3006 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3007 } 3008 PetscFunctionReturn(0); 3009 } 3010 3011 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3014 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3015 { 3016 PetscErrorCode ierr; 3017 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3018 PC_IS* pcis = (PC_IS*) (pc->data); 3019 const PetscScalar zero = 0.0; 3020 3021 PetscFunctionBegin; 3022 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3023 if (!pcbddc->benign_apply_coarse_only) { 3024 if (applytranspose) { 3025 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3026 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3027 } else { 3028 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3029 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3030 } 3031 } else { 3032 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3033 } 3034 3035 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3036 if (pcbddc->benign_n) { 3037 PetscScalar *array; 3038 PetscInt j; 3039 3040 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3041 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3042 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3043 } 3044 3045 /* start communications from local primal nodes to rhs of coarse solver */ 3046 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3047 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3048 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3049 3050 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3051 /* TODO remove null space when doing multilevel */ 3052 if (pcbddc->coarse_ksp) { 3053 Mat coarse_mat; 3054 Vec rhs,sol; 3055 MatNullSpace nullsp; 3056 3057 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3058 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3059 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3060 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3061 if (nullsp) { 3062 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3063 } 3064 if (applytranspose) { 3065 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3066 } else { 3067 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3068 } 3069 if (nullsp) { 3070 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3071 } 3072 } 3073 3074 /* Local solution on R nodes */ 3075 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3076 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3077 } 3078 /* communications from coarse sol to local primal nodes */ 3079 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3080 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3081 3082 /* Sum contributions from the two levels */ 3083 if (!pcbddc->benign_apply_coarse_only) { 3084 if (applytranspose) { 3085 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3086 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3087 } else { 3088 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3089 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3090 } 3091 /* store p0 */ 3092 if (pcbddc->benign_n) { 3093 PetscScalar *array; 3094 PetscInt j; 3095 3096 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3097 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3098 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3099 } 3100 } else { /* expand the coarse solution */ 3101 if (applytranspose) { 3102 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3103 } else { 3104 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3105 } 3106 } 3107 PetscFunctionReturn(0); 3108 } 3109 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3112 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3113 { 3114 PetscErrorCode ierr; 3115 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3116 PetscScalar *array; 3117 Vec from,to; 3118 3119 PetscFunctionBegin; 3120 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3121 from = pcbddc->coarse_vec; 3122 to = pcbddc->vec1_P; 3123 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3124 Vec tvec; 3125 3126 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3127 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3128 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3129 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3130 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3131 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3132 } 3133 } else { /* from local to global -> put data in coarse right hand side */ 3134 from = pcbddc->vec1_P; 3135 to = pcbddc->coarse_vec; 3136 } 3137 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3138 PetscFunctionReturn(0); 3139 } 3140 3141 #undef __FUNCT__ 3142 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3143 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3144 { 3145 PetscErrorCode ierr; 3146 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3147 PetscScalar *array; 3148 Vec from,to; 3149 3150 PetscFunctionBegin; 3151 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3152 from = pcbddc->coarse_vec; 3153 to = pcbddc->vec1_P; 3154 } else { /* from local to global -> put data in coarse right hand side */ 3155 from = pcbddc->vec1_P; 3156 to = pcbddc->coarse_vec; 3157 } 3158 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3159 if (smode == SCATTER_FORWARD) { 3160 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3161 Vec tvec; 3162 3163 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3164 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3165 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3166 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3167 } 3168 } else { 3169 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3170 ierr = VecResetArray(from);CHKERRQ(ierr); 3171 } 3172 } 3173 PetscFunctionReturn(0); 3174 } 3175 3176 /* uncomment for testing purposes */ 3177 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3180 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3181 { 3182 PetscErrorCode ierr; 3183 PC_IS* pcis = (PC_IS*)(pc->data); 3184 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3185 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3186 /* one and zero */ 3187 PetscScalar one=1.0,zero=0.0; 3188 /* space to store constraints and their local indices */ 3189 PetscScalar *constraints_data; 3190 PetscInt *constraints_idxs,*constraints_idxs_B; 3191 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3192 PetscInt *constraints_n; 3193 /* iterators */ 3194 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3195 /* BLAS integers */ 3196 PetscBLASInt lwork,lierr; 3197 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3198 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3199 /* reuse */ 3200 PetscInt olocal_primal_size,olocal_primal_size_cc; 3201 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3202 /* change of basis */ 3203 PetscBool qr_needed; 3204 PetscBT change_basis,qr_needed_idx; 3205 /* auxiliary stuff */ 3206 PetscInt *nnz,*is_indices; 3207 PetscInt ncc; 3208 /* some quantities */ 3209 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3210 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3211 3212 PetscFunctionBegin; 3213 /* Destroy Mat objects computed previously */ 3214 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3215 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3216 /* save info on constraints from previous setup (if any) */ 3217 olocal_primal_size = pcbddc->local_primal_size; 3218 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3219 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3220 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3221 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3222 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3223 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3224 3225 /* print some info */ 3226 if (pcbddc->dbg_flag) { 3227 IS vertices; 3228 PetscInt nv,nedges,nfaces; 3229 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3230 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3231 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3232 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3233 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3234 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3236 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3237 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3238 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3239 } 3240 3241 if (!pcbddc->adaptive_selection) { 3242 IS ISForVertices,*ISForFaces,*ISForEdges; 3243 MatNullSpace nearnullsp; 3244 const Vec *nearnullvecs; 3245 Vec *localnearnullsp; 3246 PetscScalar *array; 3247 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3248 PetscBool nnsp_has_cnst; 3249 /* LAPACK working arrays for SVD or POD */ 3250 PetscBool skip_lapack,boolforchange; 3251 PetscScalar *work; 3252 PetscReal *singular_vals; 3253 #if defined(PETSC_USE_COMPLEX) 3254 PetscReal *rwork; 3255 #endif 3256 #if defined(PETSC_MISSING_LAPACK_GESVD) 3257 PetscScalar *temp_basis,*correlation_mat; 3258 #else 3259 PetscBLASInt dummy_int=1; 3260 PetscScalar dummy_scalar=1.; 3261 #endif 3262 3263 /* Get index sets for faces, edges and vertices from graph */ 3264 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3265 /* free unneeded index sets */ 3266 if (!pcbddc->use_vertices) { 3267 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3268 } 3269 if (!pcbddc->use_edges) { 3270 for (i=0;i<n_ISForEdges;i++) { 3271 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3272 } 3273 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3274 n_ISForEdges = 0; 3275 } 3276 if (!pcbddc->use_faces) { 3277 for (i=0;i<n_ISForFaces;i++) { 3278 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3279 } 3280 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3281 n_ISForFaces = 0; 3282 } 3283 3284 #if defined(PETSC_USE_DEBUG) 3285 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3286 Also use_change_of_basis should be consistent among processors */ 3287 if (pcbddc->NullSpace) { 3288 PetscBool tbool[2],gbool[2]; 3289 3290 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3291 pcbddc->use_change_of_basis = PETSC_TRUE; 3292 if (!ISForEdges) { 3293 pcbddc->use_change_on_faces = PETSC_TRUE; 3294 } 3295 } 3296 tbool[0] = pcbddc->use_change_of_basis; 3297 tbool[1] = pcbddc->use_change_on_faces; 3298 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3299 pcbddc->use_change_of_basis = gbool[0]; 3300 pcbddc->use_change_on_faces = gbool[1]; 3301 } 3302 #endif 3303 3304 /* check if near null space is attached to global mat */ 3305 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3306 if (nearnullsp) { 3307 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3308 /* remove any stored info */ 3309 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3310 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3311 /* store information for BDDC solver reuse */ 3312 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3313 pcbddc->onearnullspace = nearnullsp; 3314 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3315 for (i=0;i<nnsp_size;i++) { 3316 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3317 } 3318 } else { /* if near null space is not provided BDDC uses constants by default */ 3319 nnsp_size = 0; 3320 nnsp_has_cnst = PETSC_TRUE; 3321 } 3322 /* get max number of constraints on a single cc */ 3323 max_constraints = nnsp_size; 3324 if (nnsp_has_cnst) max_constraints++; 3325 3326 /* 3327 Evaluate maximum storage size needed by the procedure 3328 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3329 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3330 There can be multiple constraints per connected component 3331 */ 3332 n_vertices = 0; 3333 if (ISForVertices) { 3334 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3335 } 3336 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3337 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3338 3339 total_counts = n_ISForFaces+n_ISForEdges; 3340 total_counts *= max_constraints; 3341 total_counts += n_vertices; 3342 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3343 3344 total_counts = 0; 3345 max_size_of_constraint = 0; 3346 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3347 IS used_is; 3348 if (i<n_ISForEdges) { 3349 used_is = ISForEdges[i]; 3350 } else { 3351 used_is = ISForFaces[i-n_ISForEdges]; 3352 } 3353 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3354 total_counts += j; 3355 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3356 } 3357 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3358 3359 /* get local part of global near null space vectors */ 3360 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3361 for (k=0;k<nnsp_size;k++) { 3362 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3363 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3364 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3365 } 3366 3367 /* whether or not to skip lapack calls */ 3368 skip_lapack = PETSC_TRUE; 3369 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3370 3371 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3372 if (!skip_lapack) { 3373 PetscScalar temp_work; 3374 3375 #if defined(PETSC_MISSING_LAPACK_GESVD) 3376 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3377 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3378 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3379 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3380 #if defined(PETSC_USE_COMPLEX) 3381 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3382 #endif 3383 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3384 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3385 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3386 lwork = -1; 3387 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3388 #if !defined(PETSC_USE_COMPLEX) 3389 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3390 #else 3391 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3392 #endif 3393 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3394 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3395 #else /* on missing GESVD */ 3396 /* SVD */ 3397 PetscInt max_n,min_n; 3398 max_n = max_size_of_constraint; 3399 min_n = max_constraints; 3400 if (max_size_of_constraint < max_constraints) { 3401 min_n = max_size_of_constraint; 3402 max_n = max_constraints; 3403 } 3404 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3405 #if defined(PETSC_USE_COMPLEX) 3406 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3407 #endif 3408 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3409 lwork = -1; 3410 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3411 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3412 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3413 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3414 #if !defined(PETSC_USE_COMPLEX) 3415 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3416 #else 3417 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3418 #endif 3419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3420 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3421 #endif /* on missing GESVD */ 3422 /* Allocate optimal workspace */ 3423 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3424 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3425 } 3426 /* Now we can loop on constraining sets */ 3427 total_counts = 0; 3428 constraints_idxs_ptr[0] = 0; 3429 constraints_data_ptr[0] = 0; 3430 /* vertices */ 3431 if (n_vertices) { 3432 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3433 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3434 for (i=0;i<n_vertices;i++) { 3435 constraints_n[total_counts] = 1; 3436 constraints_data[total_counts] = 1.0; 3437 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3438 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3439 total_counts++; 3440 } 3441 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3442 n_vertices = total_counts; 3443 } 3444 3445 /* edges and faces */ 3446 total_counts_cc = total_counts; 3447 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3448 IS used_is; 3449 PetscBool idxs_copied = PETSC_FALSE; 3450 3451 if (ncc<n_ISForEdges) { 3452 used_is = ISForEdges[ncc]; 3453 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3454 } else { 3455 used_is = ISForFaces[ncc-n_ISForEdges]; 3456 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3457 } 3458 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3459 3460 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3461 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3462 /* change of basis should not be performed on local periodic nodes */ 3463 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3464 if (nnsp_has_cnst) { 3465 PetscScalar quad_value; 3466 3467 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3468 idxs_copied = PETSC_TRUE; 3469 3470 if (!pcbddc->use_nnsp_true) { 3471 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3472 } else { 3473 quad_value = 1.0; 3474 } 3475 for (j=0;j<size_of_constraint;j++) { 3476 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3477 } 3478 temp_constraints++; 3479 total_counts++; 3480 } 3481 for (k=0;k<nnsp_size;k++) { 3482 PetscReal real_value; 3483 PetscScalar *ptr_to_data; 3484 3485 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3486 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3487 for (j=0;j<size_of_constraint;j++) { 3488 ptr_to_data[j] = array[is_indices[j]]; 3489 } 3490 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3491 /* check if array is null on the connected component */ 3492 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3493 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3494 if (real_value > 0.0) { /* keep indices and values */ 3495 temp_constraints++; 3496 total_counts++; 3497 if (!idxs_copied) { 3498 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3499 idxs_copied = PETSC_TRUE; 3500 } 3501 } 3502 } 3503 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3504 valid_constraints = temp_constraints; 3505 if (!pcbddc->use_nnsp_true && temp_constraints) { 3506 if (temp_constraints == 1) { /* just normalize the constraint */ 3507 PetscScalar norm,*ptr_to_data; 3508 3509 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3510 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3511 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3512 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3513 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3514 } else { /* perform SVD */ 3515 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3516 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3517 3518 #if defined(PETSC_MISSING_LAPACK_GESVD) 3519 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3520 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3521 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3522 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3523 from that computed using LAPACKgesvd 3524 -> This is due to a different computation of eigenvectors in LAPACKheev 3525 -> The quality of the POD-computed basis will be the same */ 3526 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3527 /* Store upper triangular part of correlation matrix */ 3528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3529 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3530 for (j=0;j<temp_constraints;j++) { 3531 for (k=0;k<j+1;k++) { 3532 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3533 } 3534 } 3535 /* compute eigenvalues and eigenvectors of correlation matrix */ 3536 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3537 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3538 #if !defined(PETSC_USE_COMPLEX) 3539 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3540 #else 3541 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3542 #endif 3543 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3544 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3545 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3546 j = 0; 3547 while (j < temp_constraints && singular_vals[j] < tol) j++; 3548 total_counts = total_counts-j; 3549 valid_constraints = temp_constraints-j; 3550 /* scale and copy POD basis into used quadrature memory */ 3551 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3552 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3553 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3555 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3556 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3557 if (j<temp_constraints) { 3558 PetscInt ii; 3559 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3560 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3561 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3562 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3563 for (k=0;k<temp_constraints-j;k++) { 3564 for (ii=0;ii<size_of_constraint;ii++) { 3565 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3566 } 3567 } 3568 } 3569 #else /* on missing GESVD */ 3570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3571 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3572 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3573 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3574 #if !defined(PETSC_USE_COMPLEX) 3575 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3576 #else 3577 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3578 #endif 3579 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3580 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3581 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3582 k = temp_constraints; 3583 if (k > size_of_constraint) k = size_of_constraint; 3584 j = 0; 3585 while (j < k && singular_vals[k-j-1] < tol) j++; 3586 valid_constraints = k-j; 3587 total_counts = total_counts-temp_constraints+valid_constraints; 3588 #endif /* on missing GESVD */ 3589 } 3590 } 3591 /* update pointers information */ 3592 if (valid_constraints) { 3593 constraints_n[total_counts_cc] = valid_constraints; 3594 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3595 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3596 /* set change_of_basis flag */ 3597 if (boolforchange) { 3598 PetscBTSet(change_basis,total_counts_cc); 3599 } 3600 total_counts_cc++; 3601 } 3602 } 3603 /* free workspace */ 3604 if (!skip_lapack) { 3605 ierr = PetscFree(work);CHKERRQ(ierr); 3606 #if defined(PETSC_USE_COMPLEX) 3607 ierr = PetscFree(rwork);CHKERRQ(ierr); 3608 #endif 3609 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3610 #if defined(PETSC_MISSING_LAPACK_GESVD) 3611 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3612 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3613 #endif 3614 } 3615 for (k=0;k<nnsp_size;k++) { 3616 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3617 } 3618 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3619 /* free index sets of faces, edges and vertices */ 3620 for (i=0;i<n_ISForFaces;i++) { 3621 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3622 } 3623 if (n_ISForFaces) { 3624 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3625 } 3626 for (i=0;i<n_ISForEdges;i++) { 3627 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3628 } 3629 if (n_ISForEdges) { 3630 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3631 } 3632 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3633 } else { 3634 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3635 3636 total_counts = 0; 3637 n_vertices = 0; 3638 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3639 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3640 } 3641 max_constraints = 0; 3642 total_counts_cc = 0; 3643 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3644 total_counts += pcbddc->adaptive_constraints_n[i]; 3645 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3646 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3647 } 3648 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3649 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3650 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3651 constraints_data = pcbddc->adaptive_constraints_data; 3652 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3653 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3654 total_counts_cc = 0; 3655 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3656 if (pcbddc->adaptive_constraints_n[i]) { 3657 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3658 } 3659 } 3660 #if 0 3661 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3662 for (i=0;i<total_counts_cc;i++) { 3663 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3664 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3665 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3666 printf(" %d",constraints_idxs[j]); 3667 } 3668 printf("\n"); 3669 printf("number of cc: %d\n",constraints_n[i]); 3670 } 3671 for (i=0;i<n_vertices;i++) { 3672 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3673 } 3674 for (i=0;i<sub_schurs->n_subs;i++) { 3675 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3676 } 3677 #endif 3678 3679 max_size_of_constraint = 0; 3680 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3681 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3682 /* Change of basis */ 3683 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3684 if (pcbddc->use_change_of_basis) { 3685 for (i=0;i<sub_schurs->n_subs;i++) { 3686 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3687 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3688 } 3689 } 3690 } 3691 } 3692 pcbddc->local_primal_size = total_counts; 3693 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3694 3695 /* map constraints_idxs in boundary numbering */ 3696 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3697 if (i != constraints_idxs_ptr[total_counts_cc]) { 3698 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3699 } 3700 3701 /* Create constraint matrix */ 3702 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3703 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3704 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3705 3706 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3707 /* determine if a QR strategy is needed for change of basis */ 3708 qr_needed = PETSC_FALSE; 3709 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3710 total_primal_vertices=0; 3711 pcbddc->local_primal_size_cc = 0; 3712 for (i=0;i<total_counts_cc;i++) { 3713 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3714 if (size_of_constraint == 1) { 3715 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3716 pcbddc->local_primal_size_cc += 1; 3717 } else if (PetscBTLookup(change_basis,i)) { 3718 for (k=0;k<constraints_n[i];k++) { 3719 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3720 } 3721 pcbddc->local_primal_size_cc += constraints_n[i]; 3722 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3723 PetscBTSet(qr_needed_idx,i); 3724 qr_needed = PETSC_TRUE; 3725 } 3726 } else { 3727 pcbddc->local_primal_size_cc += 1; 3728 } 3729 } 3730 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3731 pcbddc->n_vertices = total_primal_vertices; 3732 /* permute indices in order to have a sorted set of vertices */ 3733 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3734 3735 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3736 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3737 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3738 3739 /* nonzero structure of constraint matrix */ 3740 /* and get reference dof for local constraints */ 3741 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3742 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3743 3744 j = total_primal_vertices; 3745 total_counts = total_primal_vertices; 3746 cum = total_primal_vertices; 3747 for (i=n_vertices;i<total_counts_cc;i++) { 3748 if (!PetscBTLookup(change_basis,i)) { 3749 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3750 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3751 cum++; 3752 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3753 for (k=0;k<constraints_n[i];k++) { 3754 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3755 nnz[j+k] = size_of_constraint; 3756 } 3757 j += constraints_n[i]; 3758 } 3759 } 3760 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3761 ierr = PetscFree(nnz);CHKERRQ(ierr); 3762 3763 /* set values in constraint matrix */ 3764 for (i=0;i<total_primal_vertices;i++) { 3765 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3766 } 3767 total_counts = total_primal_vertices; 3768 for (i=n_vertices;i<total_counts_cc;i++) { 3769 if (!PetscBTLookup(change_basis,i)) { 3770 PetscInt *cols; 3771 3772 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3773 cols = constraints_idxs+constraints_idxs_ptr[i]; 3774 for (k=0;k<constraints_n[i];k++) { 3775 PetscInt row = total_counts+k; 3776 PetscScalar *vals; 3777 3778 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3779 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3780 } 3781 total_counts += constraints_n[i]; 3782 } 3783 } 3784 /* assembling */ 3785 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3786 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3787 3788 /* 3789 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3790 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3791 */ 3792 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3793 if (pcbddc->use_change_of_basis) { 3794 /* dual and primal dofs on a single cc */ 3795 PetscInt dual_dofs,primal_dofs; 3796 /* working stuff for GEQRF */ 3797 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3798 PetscBLASInt lqr_work; 3799 /* working stuff for UNGQR */ 3800 PetscScalar *gqr_work,lgqr_work_t; 3801 PetscBLASInt lgqr_work; 3802 /* working stuff for TRTRS */ 3803 PetscScalar *trs_rhs; 3804 PetscBLASInt Blas_NRHS; 3805 /* pointers for values insertion into change of basis matrix */ 3806 PetscInt *start_rows,*start_cols; 3807 PetscScalar *start_vals; 3808 /* working stuff for values insertion */ 3809 PetscBT is_primal; 3810 PetscInt *aux_primal_numbering_B; 3811 /* matrix sizes */ 3812 PetscInt global_size,local_size; 3813 /* temporary change of basis */ 3814 Mat localChangeOfBasisMatrix; 3815 /* extra space for debugging */ 3816 PetscScalar *dbg_work; 3817 3818 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3819 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3820 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3821 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3822 /* nonzeros for local mat */ 3823 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3824 for (i=0;i<pcis->n;i++) nnz[i]=1; 3825 for (i=n_vertices;i<total_counts_cc;i++) { 3826 if (PetscBTLookup(change_basis,i)) { 3827 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3828 if (PetscBTLookup(qr_needed_idx,i)) { 3829 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3830 } else { 3831 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3832 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3833 } 3834 } 3835 } 3836 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3837 ierr = PetscFree(nnz);CHKERRQ(ierr); 3838 /* Set initial identity in the matrix */ 3839 for (i=0;i<pcis->n;i++) { 3840 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3841 } 3842 3843 if (pcbddc->dbg_flag) { 3844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3845 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3846 } 3847 3848 3849 /* Now we loop on the constraints which need a change of basis */ 3850 /* 3851 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3852 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3853 3854 Basic blocks of change of basis matrix T computed by 3855 3856 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3857 3858 | 1 0 ... 0 s_1/S | 3859 | 0 1 ... 0 s_2/S | 3860 | ... | 3861 | 0 ... 1 s_{n-1}/S | 3862 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3863 3864 with S = \sum_{i=1}^n s_i^2 3865 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3866 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3867 3868 - QR decomposition of constraints otherwise 3869 */ 3870 if (qr_needed) { 3871 /* space to store Q */ 3872 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3873 /* first we issue queries for optimal work */ 3874 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3875 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3876 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3877 lqr_work = -1; 3878 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3879 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3880 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3881 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3882 lgqr_work = -1; 3883 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3884 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3885 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3886 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3887 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3888 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3889 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3890 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3891 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3892 /* array to store scaling factors for reflectors */ 3893 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3894 /* array to store rhs and solution of triangular solver */ 3895 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3896 /* allocating workspace for check */ 3897 if (pcbddc->dbg_flag) { 3898 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3899 } 3900 } 3901 /* array to store whether a node is primal or not */ 3902 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3903 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3904 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3905 if (i != total_primal_vertices) { 3906 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3907 } 3908 for (i=0;i<total_primal_vertices;i++) { 3909 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3910 } 3911 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3912 3913 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3914 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3915 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3916 if (PetscBTLookup(change_basis,total_counts)) { 3917 /* get constraint info */ 3918 primal_dofs = constraints_n[total_counts]; 3919 dual_dofs = size_of_constraint-primal_dofs; 3920 3921 if (pcbddc->dbg_flag) { 3922 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3923 } 3924 3925 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3926 3927 /* copy quadrature constraints for change of basis check */ 3928 if (pcbddc->dbg_flag) { 3929 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3930 } 3931 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3932 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3933 3934 /* compute QR decomposition of constraints */ 3935 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3936 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3937 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3938 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3939 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3940 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3941 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3942 3943 /* explictly compute R^-T */ 3944 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3945 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3946 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3947 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3948 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3949 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3950 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3951 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3952 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3953 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3954 3955 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3957 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3958 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3959 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3960 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3961 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3962 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3963 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3964 3965 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3966 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3967 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3969 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3970 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3972 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3973 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3974 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3975 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3976 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3977 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3978 3979 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3980 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3981 /* insert cols for primal dofs */ 3982 for (j=0;j<primal_dofs;j++) { 3983 start_vals = &qr_basis[j*size_of_constraint]; 3984 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3985 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3986 } 3987 /* insert cols for dual dofs */ 3988 for (j=0,k=0;j<dual_dofs;k++) { 3989 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3990 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3991 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3992 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3993 j++; 3994 } 3995 } 3996 3997 /* check change of basis */ 3998 if (pcbddc->dbg_flag) { 3999 PetscInt ii,jj; 4000 PetscBool valid_qr=PETSC_TRUE; 4001 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4004 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4006 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4007 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4008 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4009 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4010 for (jj=0;jj<size_of_constraint;jj++) { 4011 for (ii=0;ii<primal_dofs;ii++) { 4012 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4013 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4014 } 4015 } 4016 if (!valid_qr) { 4017 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4018 for (jj=0;jj<size_of_constraint;jj++) { 4019 for (ii=0;ii<primal_dofs;ii++) { 4020 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4021 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4022 } 4023 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4024 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4025 } 4026 } 4027 } 4028 } else { 4029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4030 } 4031 } 4032 } else { /* simple transformation block */ 4033 PetscInt row,col; 4034 PetscScalar val,norm; 4035 4036 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4037 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4038 for (j=0;j<size_of_constraint;j++) { 4039 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4040 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4041 if (!PetscBTLookup(is_primal,row_B)) { 4042 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4043 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4044 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4045 } else { 4046 for (k=0;k<size_of_constraint;k++) { 4047 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4048 if (row != col) { 4049 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4050 } else { 4051 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4052 } 4053 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4054 } 4055 } 4056 } 4057 if (pcbddc->dbg_flag) { 4058 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4059 } 4060 } 4061 } else { 4062 if (pcbddc->dbg_flag) { 4063 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4064 } 4065 } 4066 } 4067 4068 /* free workspace */ 4069 if (qr_needed) { 4070 if (pcbddc->dbg_flag) { 4071 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4072 } 4073 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4074 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4075 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4076 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4077 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4078 } 4079 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4080 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4081 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4082 4083 /* assembling of global change of variable */ 4084 if (!pcbddc->fake_change) { 4085 Mat tmat; 4086 PetscInt bs; 4087 4088 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4089 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4090 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4091 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4092 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4093 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4094 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4095 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4096 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4097 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4098 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4099 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4100 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4101 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4102 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4103 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4104 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4105 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4106 4107 /* check */ 4108 if (pcbddc->dbg_flag) { 4109 PetscReal error; 4110 Vec x,x_change; 4111 4112 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4113 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4114 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4115 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4116 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4117 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4118 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4119 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4120 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4121 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4122 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4123 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4124 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4125 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4126 ierr = VecDestroy(&x);CHKERRQ(ierr); 4127 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4128 } 4129 } 4130 4131 /* adapt sub_schurs computed (if any) */ 4132 if (pcbddc->use_deluxe_scaling) { 4133 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4134 if (sub_schurs->S_Ej_all) { 4135 Mat S_new,tmat; 4136 ISLocalToGlobalMapping NtoSall; 4137 IS is_all_N,is_V,is_V_Sall; 4138 const PetscScalar *array; 4139 const PetscInt *idxs_V,*idxs_all; 4140 PetscInt i,n_V; 4141 4142 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4143 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4144 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4145 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4146 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4147 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4148 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4149 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4150 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4151 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4152 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4153 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4154 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4155 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4156 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4157 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4158 for (i=0;i<n_V;i++) { 4159 PetscScalar val; 4160 PetscInt idx; 4161 4162 idx = idxs_V[i]; 4163 val = array[idxs_all[idxs_V[i]]]; 4164 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4165 } 4166 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4167 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4168 sub_schurs->S_Ej_all = S_new; 4169 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4170 if (sub_schurs->sum_S_Ej_all) { 4171 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4172 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4173 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4174 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4175 sub_schurs->sum_S_Ej_all = S_new; 4176 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4177 } 4178 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4179 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4180 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4181 if (!pcbddc->fake_change) { 4182 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4183 } else { 4184 ierr = ISDestroy(&sub_schurs->change_primal);CHKERRQ(ierr); 4185 sub_schurs->change_primal = is_V_Sall; 4186 ierr = KSPDestroy(&sub_schurs->change);CHKERRQ(ierr); 4187 ierr = KSPCreate(PETSC_COMM_SELF,&sub_schurs->change);CHKERRQ(ierr); 4188 ierr = KSPSetOperators(sub_schurs->change,tmat,tmat);CHKERRQ(ierr); 4189 } 4190 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4191 } 4192 } 4193 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4194 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4195 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4196 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4197 } 4198 4199 /* set up change of basis context */ 4200 if (pcbddc->ChangeOfBasisMatrix) { 4201 PCBDDCChange_ctx change_ctx; 4202 4203 if (!pcbddc->new_global_mat) { 4204 PetscInt global_size,local_size; 4205 4206 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4207 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4208 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4209 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4210 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4211 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4212 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4213 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4214 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4215 } else { 4216 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4217 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4218 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4219 } 4220 if (!pcbddc->user_ChangeOfBasisMatrix) { 4221 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4222 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4223 } else { 4224 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4225 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4226 } 4227 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4228 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4229 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4230 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4231 } else { 4232 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4233 } 4234 4235 /* add pressure dofs to set of primal nodes for numbering purposes */ 4236 for (i=0;i<pcbddc->benign_n;i++) { 4237 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4238 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4239 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4240 pcbddc->local_primal_size_cc++; 4241 pcbddc->local_primal_size++; 4242 } 4243 4244 /* check if a new primal space has been introduced (also take into account benign trick) */ 4245 pcbddc->new_primal_space_local = PETSC_TRUE; 4246 if (olocal_primal_size == pcbddc->local_primal_size) { 4247 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4248 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4249 if (!pcbddc->new_primal_space_local) { 4250 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4251 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4252 } 4253 } 4254 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4255 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4256 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4257 4258 /* flush dbg viewer */ 4259 if (pcbddc->dbg_flag) { 4260 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4261 } 4262 4263 /* free workspace */ 4264 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4265 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4266 if (!pcbddc->adaptive_selection) { 4267 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4268 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4269 } else { 4270 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4271 pcbddc->adaptive_constraints_idxs_ptr, 4272 pcbddc->adaptive_constraints_data_ptr, 4273 pcbddc->adaptive_constraints_idxs, 4274 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4275 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4276 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4277 } 4278 PetscFunctionReturn(0); 4279 } 4280 4281 #undef __FUNCT__ 4282 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4283 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4284 { 4285 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4286 PC_IS *pcis = (PC_IS*)pc->data; 4287 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4288 PetscInt ierr,i,vertex_size,N; 4289 PetscViewer viewer=pcbddc->dbg_viewer; 4290 4291 PetscFunctionBegin; 4292 /* Reset previously computed graph */ 4293 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4294 /* Init local Graph struct */ 4295 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4296 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4297 4298 /* Check validity of the csr graph passed in by the user */ 4299 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4300 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4301 } 4302 4303 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4304 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4305 PetscInt *xadj,*adjncy; 4306 PetscInt nvtxs; 4307 PetscBool flg_row=PETSC_FALSE; 4308 4309 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4310 if (flg_row) { 4311 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4312 pcbddc->computed_rowadj = PETSC_TRUE; 4313 } 4314 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4315 } 4316 if (pcbddc->dbg_flag) { 4317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4318 } 4319 4320 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4321 vertex_size = 1; 4322 if (pcbddc->user_provided_isfordofs) { 4323 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4324 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4325 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4326 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4327 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4328 } 4329 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4330 pcbddc->n_ISForDofs = 0; 4331 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4332 } 4333 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4334 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4335 } else { 4336 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4337 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4338 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4339 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4340 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4341 } 4342 } 4343 } 4344 4345 /* Setup of Graph */ 4346 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4347 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4348 } 4349 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4350 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4351 } 4352 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4353 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4354 } 4355 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4356 4357 /* attach info on disconnected subdomains if present */ 4358 if (pcbddc->n_local_subs) { 4359 PetscInt *local_subs; 4360 4361 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4362 for (i=0;i<pcbddc->n_local_subs;i++) { 4363 const PetscInt *idxs; 4364 PetscInt nl,j; 4365 4366 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4367 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4368 for (j=0;j<nl;j++) { 4369 local_subs[idxs[j]] = i; 4370 } 4371 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4372 } 4373 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4374 pcbddc->mat_graph->local_subs = local_subs; 4375 } 4376 4377 /* Graph's connected components analysis */ 4378 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4379 4380 /* print some info to stdout */ 4381 if (pcbddc->dbg_flag) { 4382 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4383 } 4384 4385 /* mark topography has done */ 4386 pcbddc->recompute_topography = PETSC_FALSE; 4387 PetscFunctionReturn(0); 4388 } 4389 4390 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4391 #undef __FUNCT__ 4392 #define __FUNCT__ "PCBDDCSubsetNumbering" 4393 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4394 { 4395 PetscSF sf; 4396 PetscLayout map; 4397 const PetscInt *idxs; 4398 PetscInt *leaf_data,*root_data,*gidxs; 4399 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4400 PetscInt n_n,nlocals,start,first_index; 4401 PetscMPIInt commsize; 4402 PetscBool first_found; 4403 PetscErrorCode ierr; 4404 4405 PetscFunctionBegin; 4406 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4407 if (subset_mult) { 4408 PetscCheckSameComm(subset,1,subset_mult,2); 4409 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4410 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4411 } 4412 /* create workspace layout for computing global indices of subset */ 4413 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4414 lbounds[0] = lbounds[1] = 0; 4415 for (i=0;i<n;i++) { 4416 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4417 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4418 } 4419 lbounds[0] = -lbounds[0]; 4420 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4421 gbounds[0] = -gbounds[0]; 4422 N = gbounds[1] - gbounds[0] + 1; 4423 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4424 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4425 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4426 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4427 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4428 4429 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4430 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4431 if (subset_mult) { 4432 const PetscInt* idxs_mult; 4433 4434 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4435 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4436 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4437 } else { 4438 for (i=0;i<n;i++) leaf_data[i] = 1; 4439 } 4440 /* local size of new subset */ 4441 n_n = 0; 4442 for (i=0;i<n;i++) n_n += leaf_data[i]; 4443 4444 /* global indexes in layout */ 4445 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4446 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4447 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4448 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4449 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4450 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4451 4452 /* reduce from leaves to roots */ 4453 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4454 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4455 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4456 4457 /* count indexes in local part of layout */ 4458 nlocals = 0; 4459 first_index = -1; 4460 first_found = PETSC_FALSE; 4461 for (i=0;i<Nl;i++) { 4462 if (!first_found && root_data[i]) { 4463 first_found = PETSC_TRUE; 4464 first_index = i; 4465 } 4466 nlocals += root_data[i]; 4467 } 4468 4469 /* cumulative of number of indexes and size of subset without holes */ 4470 #if defined(PETSC_HAVE_MPI_EXSCAN) 4471 start = 0; 4472 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4473 #else 4474 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4475 start = start-nlocals; 4476 #endif 4477 4478 if (N_n) { /* compute total size of new subset if requested */ 4479 *N_n = start + nlocals; 4480 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4481 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4482 } 4483 4484 /* adapt root data with cumulative */ 4485 if (first_found) { 4486 PetscInt old_index; 4487 4488 root_data[first_index] += start; 4489 old_index = first_index; 4490 for (i=first_index+1;i<Nl;i++) { 4491 if (root_data[i]) { 4492 root_data[i] += root_data[old_index]; 4493 old_index = i; 4494 } 4495 } 4496 } 4497 4498 /* from roots to leaves */ 4499 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4500 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4501 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4502 4503 /* create new IS with global indexes without holes */ 4504 if (subset_mult) { 4505 const PetscInt* idxs_mult; 4506 PetscInt cum; 4507 4508 cum = 0; 4509 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4510 for (i=0;i<n;i++) { 4511 PetscInt j; 4512 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4513 } 4514 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4515 } else { 4516 for (i=0;i<n;i++) { 4517 gidxs[i] = leaf_data[i]-1; 4518 } 4519 } 4520 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4521 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4522 PetscFunctionReturn(0); 4523 } 4524 4525 #undef __FUNCT__ 4526 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4527 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4528 { 4529 PetscInt i,j; 4530 PetscScalar *alphas; 4531 PetscErrorCode ierr; 4532 4533 PetscFunctionBegin; 4534 /* this implements stabilized Gram-Schmidt */ 4535 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4536 for (i=0;i<n;i++) { 4537 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4538 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4539 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4540 } 4541 ierr = PetscFree(alphas);CHKERRQ(ierr); 4542 PetscFunctionReturn(0); 4543 } 4544 4545 #undef __FUNCT__ 4546 #define __FUNCT__ "MatISGetSubassemblingPattern" 4547 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4548 { 4549 IS ranks_send_to; 4550 PetscInt n_neighs,*neighs,*n_shared,**shared; 4551 PetscMPIInt size,rank,color; 4552 PetscInt *xadj,*adjncy; 4553 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4554 PetscInt i,local_size,threshold=0; 4555 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4556 PetscSubcomm subcomm; 4557 PetscErrorCode ierr; 4558 4559 PetscFunctionBegin; 4560 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4561 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4562 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4563 4564 /* Get info on mapping */ 4565 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4566 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4567 4568 /* build local CSR graph of subdomains' connectivity */ 4569 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4570 xadj[0] = 0; 4571 xadj[1] = PetscMax(n_neighs-1,0); 4572 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4573 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4574 4575 if (threshold) { 4576 PetscInt xadj_count = 0; 4577 for (i=1;i<n_neighs;i++) { 4578 if (n_shared[i] > threshold) { 4579 adjncy[xadj_count] = neighs[i]; 4580 adjncy_wgt[xadj_count] = n_shared[i]; 4581 xadj_count++; 4582 } 4583 } 4584 xadj[1] = xadj_count; 4585 } else { 4586 if (xadj[1]) { 4587 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4588 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4589 } 4590 } 4591 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4592 if (use_square) { 4593 for (i=0;i<xadj[1];i++) { 4594 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4595 } 4596 } 4597 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4598 4599 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4600 4601 /* 4602 Restrict work on active processes only. 4603 */ 4604 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4605 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4606 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4607 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4608 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4609 if (color) { 4610 ierr = PetscFree(xadj);CHKERRQ(ierr); 4611 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4612 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4613 } else { 4614 Mat subdomain_adj; 4615 IS new_ranks,new_ranks_contig; 4616 MatPartitioning partitioner; 4617 PetscInt prank,rstart=0,rend=0; 4618 PetscInt *is_indices,*oldranks; 4619 PetscBool aggregate; 4620 4621 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4622 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4623 prank = rank; 4624 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4625 /* 4626 for (i=0;i<size;i++) { 4627 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4628 } 4629 */ 4630 for (i=0;i<xadj[1];i++) { 4631 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4632 } 4633 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4634 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4635 if (aggregate) { 4636 PetscInt lrows,row,ncols,*cols; 4637 PetscMPIInt nrank; 4638 PetscScalar *vals; 4639 4640 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4641 lrows = 0; 4642 if (nrank<redprocs) { 4643 lrows = size/redprocs; 4644 if (nrank<size%redprocs) lrows++; 4645 } 4646 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4647 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4648 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4649 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4650 row = nrank; 4651 ncols = xadj[1]-xadj[0]; 4652 cols = adjncy; 4653 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4654 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4655 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4656 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4657 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4658 ierr = PetscFree(xadj);CHKERRQ(ierr); 4659 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4660 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4661 ierr = PetscFree(vals);CHKERRQ(ierr); 4662 } else { 4663 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4664 } 4665 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4666 4667 /* Partition */ 4668 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4669 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4670 if (use_vwgt) { 4671 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4672 v_wgt[0] = local_size; 4673 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4674 } 4675 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4676 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4677 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4678 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4679 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4680 4681 /* renumber new_ranks to avoid "holes" in new set of processors */ 4682 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4683 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4684 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4685 if (!redprocs) { 4686 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4687 } else { 4688 PetscInt idxs[1]; 4689 PetscMPIInt tag; 4690 MPI_Request *reqs; 4691 4692 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4693 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4694 for (i=rstart;i<rend;i++) { 4695 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4696 } 4697 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4698 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4699 ierr = PetscFree(reqs);CHKERRQ(ierr); 4700 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4701 } 4702 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4703 /* clean up */ 4704 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4705 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4706 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4707 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4708 } 4709 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4710 4711 /* assemble parallel IS for sends */ 4712 i = 1; 4713 if (color) i=0; 4714 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4715 /* get back IS */ 4716 *is_sends = ranks_send_to; 4717 PetscFunctionReturn(0); 4718 } 4719 4720 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4721 4722 #undef __FUNCT__ 4723 #define __FUNCT__ "MatISSubassemble" 4724 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4725 { 4726 Mat local_mat; 4727 IS is_sends_internal; 4728 PetscInt rows,cols,new_local_rows; 4729 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4730 PetscBool ismatis,isdense,newisdense,destroy_mat; 4731 ISLocalToGlobalMapping l2gmap; 4732 PetscInt* l2gmap_indices; 4733 const PetscInt* is_indices; 4734 MatType new_local_type; 4735 /* buffers */ 4736 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4737 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4738 PetscInt *recv_buffer_idxs_local; 4739 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4740 /* MPI */ 4741 MPI_Comm comm,comm_n; 4742 PetscSubcomm subcomm; 4743 PetscMPIInt n_sends,n_recvs,commsize; 4744 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4745 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4746 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4747 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4748 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4749 PetscErrorCode ierr; 4750 4751 PetscFunctionBegin; 4752 /* TODO: add missing checks */ 4753 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4754 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4755 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4756 PetscValidLogicalCollectiveInt(mat,nis,7); 4757 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4758 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4759 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4760 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4761 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4762 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4763 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4764 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4765 PetscInt mrows,mcols,mnrows,mncols; 4766 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4767 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4768 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4769 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4770 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4771 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4772 } 4773 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4774 PetscValidLogicalCollectiveInt(mat,bs,0); 4775 /* prepare IS for sending if not provided */ 4776 if (!is_sends) { 4777 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4778 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4779 } else { 4780 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4781 is_sends_internal = is_sends; 4782 } 4783 4784 /* get comm */ 4785 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4786 4787 /* compute number of sends */ 4788 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4789 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4790 4791 /* compute number of receives */ 4792 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4793 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4794 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4795 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4796 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4797 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4798 ierr = PetscFree(iflags);CHKERRQ(ierr); 4799 4800 /* restrict comm if requested */ 4801 subcomm = 0; 4802 destroy_mat = PETSC_FALSE; 4803 if (restrict_comm) { 4804 PetscMPIInt color,subcommsize; 4805 4806 color = 0; 4807 if (restrict_full) { 4808 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4809 } else { 4810 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4811 } 4812 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4813 subcommsize = commsize - subcommsize; 4814 /* check if reuse has been requested */ 4815 if (reuse == MAT_REUSE_MATRIX) { 4816 if (*mat_n) { 4817 PetscMPIInt subcommsize2; 4818 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4819 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4820 comm_n = PetscObjectComm((PetscObject)*mat_n); 4821 } else { 4822 comm_n = PETSC_COMM_SELF; 4823 } 4824 } else { /* MAT_INITIAL_MATRIX */ 4825 PetscMPIInt rank; 4826 4827 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4828 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4829 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4830 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4831 comm_n = PetscSubcommChild(subcomm); 4832 } 4833 /* flag to destroy *mat_n if not significative */ 4834 if (color) destroy_mat = PETSC_TRUE; 4835 } else { 4836 comm_n = comm; 4837 } 4838 4839 /* prepare send/receive buffers */ 4840 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4841 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4842 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4843 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4844 if (nis) { 4845 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4846 } 4847 4848 /* Get data from local matrices */ 4849 if (!isdense) { 4850 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4851 /* TODO: See below some guidelines on how to prepare the local buffers */ 4852 /* 4853 send_buffer_vals should contain the raw values of the local matrix 4854 send_buffer_idxs should contain: 4855 - MatType_PRIVATE type 4856 - PetscInt size_of_l2gmap 4857 - PetscInt global_row_indices[size_of_l2gmap] 4858 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4859 */ 4860 } else { 4861 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4862 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4863 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4864 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4865 send_buffer_idxs[1] = i; 4866 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4867 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4868 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4869 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4870 for (i=0;i<n_sends;i++) { 4871 ilengths_vals[is_indices[i]] = len*len; 4872 ilengths_idxs[is_indices[i]] = len+2; 4873 } 4874 } 4875 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4876 /* additional is (if any) */ 4877 if (nis) { 4878 PetscMPIInt psum; 4879 PetscInt j; 4880 for (j=0,psum=0;j<nis;j++) { 4881 PetscInt plen; 4882 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4883 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4884 psum += len+1; /* indices + lenght */ 4885 } 4886 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4887 for (j=0,psum=0;j<nis;j++) { 4888 PetscInt plen; 4889 const PetscInt *is_array_idxs; 4890 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4891 send_buffer_idxs_is[psum] = plen; 4892 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4893 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4894 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4895 psum += plen+1; /* indices + lenght */ 4896 } 4897 for (i=0;i<n_sends;i++) { 4898 ilengths_idxs_is[is_indices[i]] = psum; 4899 } 4900 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4901 } 4902 4903 buf_size_idxs = 0; 4904 buf_size_vals = 0; 4905 buf_size_idxs_is = 0; 4906 for (i=0;i<n_recvs;i++) { 4907 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4908 buf_size_vals += (PetscInt)olengths_vals[i]; 4909 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4910 } 4911 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4912 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4913 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4914 4915 /* get new tags for clean communications */ 4916 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4917 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4918 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4919 4920 /* allocate for requests */ 4921 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4922 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4923 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4924 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4925 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4926 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4927 4928 /* communications */ 4929 ptr_idxs = recv_buffer_idxs; 4930 ptr_vals = recv_buffer_vals; 4931 ptr_idxs_is = recv_buffer_idxs_is; 4932 for (i=0;i<n_recvs;i++) { 4933 source_dest = onodes[i]; 4934 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4935 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4936 ptr_idxs += olengths_idxs[i]; 4937 ptr_vals += olengths_vals[i]; 4938 if (nis) { 4939 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4940 ptr_idxs_is += olengths_idxs_is[i]; 4941 } 4942 } 4943 for (i=0;i<n_sends;i++) { 4944 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4945 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4946 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4947 if (nis) { 4948 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4949 } 4950 } 4951 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4952 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4953 4954 /* assemble new l2g map */ 4955 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4956 ptr_idxs = recv_buffer_idxs; 4957 new_local_rows = 0; 4958 for (i=0;i<n_recvs;i++) { 4959 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4960 ptr_idxs += olengths_idxs[i]; 4961 } 4962 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4963 ptr_idxs = recv_buffer_idxs; 4964 new_local_rows = 0; 4965 for (i=0;i<n_recvs;i++) { 4966 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4967 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4968 ptr_idxs += olengths_idxs[i]; 4969 } 4970 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4971 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4972 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4973 4974 /* infer new local matrix type from received local matrices type */ 4975 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4976 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4977 if (n_recvs) { 4978 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4979 ptr_idxs = recv_buffer_idxs; 4980 for (i=0;i<n_recvs;i++) { 4981 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4982 new_local_type_private = MATAIJ_PRIVATE; 4983 break; 4984 } 4985 ptr_idxs += olengths_idxs[i]; 4986 } 4987 switch (new_local_type_private) { 4988 case MATDENSE_PRIVATE: 4989 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4990 new_local_type = MATSEQAIJ; 4991 bs = 1; 4992 } else { /* if I receive only 1 dense matrix */ 4993 new_local_type = MATSEQDENSE; 4994 bs = 1; 4995 } 4996 break; 4997 case MATAIJ_PRIVATE: 4998 new_local_type = MATSEQAIJ; 4999 bs = 1; 5000 break; 5001 case MATBAIJ_PRIVATE: 5002 new_local_type = MATSEQBAIJ; 5003 break; 5004 case MATSBAIJ_PRIVATE: 5005 new_local_type = MATSEQSBAIJ; 5006 break; 5007 default: 5008 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5009 break; 5010 } 5011 } else { /* by default, new_local_type is seqdense */ 5012 new_local_type = MATSEQDENSE; 5013 bs = 1; 5014 } 5015 5016 /* create MATIS object if needed */ 5017 if (reuse == MAT_INITIAL_MATRIX) { 5018 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5019 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5020 } else { 5021 /* it also destroys the local matrices */ 5022 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5023 } 5024 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5025 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5026 5027 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5028 5029 /* Global to local map of received indices */ 5030 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5031 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5032 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5033 5034 /* restore attributes -> type of incoming data and its size */ 5035 buf_size_idxs = 0; 5036 for (i=0;i<n_recvs;i++) { 5037 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5038 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5039 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5040 } 5041 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5042 5043 /* set preallocation */ 5044 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5045 if (!newisdense) { 5046 PetscInt *new_local_nnz=0; 5047 5048 ptr_vals = recv_buffer_vals; 5049 ptr_idxs = recv_buffer_idxs_local; 5050 if (n_recvs) { 5051 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5052 } 5053 for (i=0;i<n_recvs;i++) { 5054 PetscInt j; 5055 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5056 for (j=0;j<*(ptr_idxs+1);j++) { 5057 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5058 } 5059 } else { 5060 /* TODO */ 5061 } 5062 ptr_idxs += olengths_idxs[i]; 5063 } 5064 if (new_local_nnz) { 5065 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5066 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5067 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5068 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5069 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5070 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5071 } else { 5072 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5073 } 5074 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5075 } else { 5076 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5077 } 5078 5079 /* set values */ 5080 ptr_vals = recv_buffer_vals; 5081 ptr_idxs = recv_buffer_idxs_local; 5082 for (i=0;i<n_recvs;i++) { 5083 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5084 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5085 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5086 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5087 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5088 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5089 } else { 5090 /* TODO */ 5091 } 5092 ptr_idxs += olengths_idxs[i]; 5093 ptr_vals += olengths_vals[i]; 5094 } 5095 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5096 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5097 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5098 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5099 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5100 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5101 5102 #if 0 5103 if (!restrict_comm) { /* check */ 5104 Vec lvec,rvec; 5105 PetscReal infty_error; 5106 5107 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5108 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5109 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5110 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5111 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5112 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5113 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5114 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5115 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5116 } 5117 #endif 5118 5119 /* assemble new additional is (if any) */ 5120 if (nis) { 5121 PetscInt **temp_idxs,*count_is,j,psum; 5122 5123 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5124 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5125 ptr_idxs = recv_buffer_idxs_is; 5126 psum = 0; 5127 for (i=0;i<n_recvs;i++) { 5128 for (j=0;j<nis;j++) { 5129 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5130 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5131 psum += plen; 5132 ptr_idxs += plen+1; /* shift pointer to received data */ 5133 } 5134 } 5135 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5136 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5137 for (i=1;i<nis;i++) { 5138 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5139 } 5140 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5141 ptr_idxs = recv_buffer_idxs_is; 5142 for (i=0;i<n_recvs;i++) { 5143 for (j=0;j<nis;j++) { 5144 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5145 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5146 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5147 ptr_idxs += plen+1; /* shift pointer to received data */ 5148 } 5149 } 5150 for (i=0;i<nis;i++) { 5151 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5152 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5153 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5154 } 5155 ierr = PetscFree(count_is);CHKERRQ(ierr); 5156 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5157 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5158 } 5159 /* free workspace */ 5160 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5161 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5162 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5163 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5164 if (isdense) { 5165 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5166 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5167 } else { 5168 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5169 } 5170 if (nis) { 5171 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5172 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5173 } 5174 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5175 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5176 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5177 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5178 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5179 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5180 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5181 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5182 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5183 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5184 ierr = PetscFree(onodes);CHKERRQ(ierr); 5185 if (nis) { 5186 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5187 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5188 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5189 } 5190 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5191 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5192 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5193 for (i=0;i<nis;i++) { 5194 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5195 } 5196 *mat_n = NULL; 5197 } 5198 PetscFunctionReturn(0); 5199 } 5200 5201 /* temporary hack into ksp private data structure */ 5202 #include <petsc/private/kspimpl.h> 5203 5204 #undef __FUNCT__ 5205 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5206 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5207 { 5208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5209 PC_IS *pcis = (PC_IS*)pc->data; 5210 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5211 MatNullSpace CoarseNullSpace=NULL; 5212 ISLocalToGlobalMapping coarse_islg; 5213 IS coarse_is,*isarray; 5214 PetscInt i,im_active=-1,active_procs=-1; 5215 PetscInt nis,nisdofs,nisneu,nisvert; 5216 PC pc_temp; 5217 PCType coarse_pc_type; 5218 KSPType coarse_ksp_type; 5219 PetscBool multilevel_requested,multilevel_allowed; 5220 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5221 Mat t_coarse_mat_is; 5222 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5223 PetscMPIInt all_procs; 5224 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5225 PetscBool compute_vecs = PETSC_FALSE; 5226 PetscScalar *array; 5227 PetscErrorCode ierr; 5228 5229 PetscFunctionBegin; 5230 /* Assign global numbering to coarse dofs */ 5231 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5232 PetscInt ocoarse_size; 5233 compute_vecs = PETSC_TRUE; 5234 ocoarse_size = pcbddc->coarse_size; 5235 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5236 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5237 /* see if we can avoid some work */ 5238 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5239 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5240 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5241 PC pc; 5242 PetscBool isbddc; 5243 5244 /* temporary workaround since PCBDDC does not have a reset method so far */ 5245 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5246 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5247 if (isbddc) { 5248 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5249 } else { 5250 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5251 } 5252 coarse_reuse = PETSC_FALSE; 5253 } else { /* we can safely reuse already computed coarse matrix */ 5254 coarse_reuse = PETSC_TRUE; 5255 } 5256 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5257 coarse_reuse = PETSC_FALSE; 5258 } 5259 /* reset any subassembling information */ 5260 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5261 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5262 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5263 coarse_reuse = PETSC_TRUE; 5264 } 5265 5266 /* count "active" (i.e. with positive local size) and "void" processes */ 5267 im_active = !!(pcis->n); 5268 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5269 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5270 void_procs = all_procs-active_procs; 5271 csin_type_simple = PETSC_TRUE; 5272 redist = PETSC_FALSE; 5273 if (pcbddc->current_level && void_procs) { 5274 csin_ml = PETSC_TRUE; 5275 ncoarse_ml = void_procs; 5276 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5277 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5278 csin_ds = PETSC_TRUE; 5279 ncoarse_ds = pcbddc->redistribute_coarse; 5280 redist = PETSC_TRUE; 5281 } else { 5282 csin_ds = PETSC_TRUE; 5283 ncoarse_ds = active_procs; 5284 redist = PETSC_TRUE; 5285 } 5286 } else { 5287 csin_ml = PETSC_FALSE; 5288 ncoarse_ml = all_procs; 5289 if (void_procs) { 5290 csin_ds = PETSC_TRUE; 5291 ncoarse_ds = void_procs; 5292 csin_type_simple = PETSC_FALSE; 5293 } else { 5294 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5295 csin_ds = PETSC_TRUE; 5296 ncoarse_ds = pcbddc->redistribute_coarse; 5297 redist = PETSC_TRUE; 5298 } else { 5299 csin_ds = PETSC_FALSE; 5300 ncoarse_ds = all_procs; 5301 } 5302 } 5303 } 5304 5305 /* 5306 test if we can go multilevel: three conditions must be satisfied: 5307 - we have not exceeded the number of levels requested 5308 - we can actually subassemble the active processes 5309 - we can find a suitable number of MPI processes where we can place the subassembled problem 5310 */ 5311 multilevel_allowed = PETSC_FALSE; 5312 multilevel_requested = PETSC_FALSE; 5313 if (pcbddc->current_level < pcbddc->max_levels) { 5314 multilevel_requested = PETSC_TRUE; 5315 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5316 multilevel_allowed = PETSC_FALSE; 5317 } else { 5318 multilevel_allowed = PETSC_TRUE; 5319 } 5320 } 5321 /* determine number of process partecipating to coarse solver */ 5322 if (multilevel_allowed) { 5323 ncoarse = ncoarse_ml; 5324 csin = csin_ml; 5325 redist = PETSC_FALSE; 5326 } else { 5327 ncoarse = ncoarse_ds; 5328 csin = csin_ds; 5329 } 5330 5331 /* creates temporary l2gmap and IS for coarse indexes */ 5332 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5333 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5334 5335 /* creates temporary MATIS object for coarse matrix */ 5336 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5337 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5338 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5339 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5340 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5341 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5342 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5343 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5344 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5345 5346 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5347 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5348 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5349 const PetscInt *idxs; 5350 ISLocalToGlobalMapping tmap; 5351 5352 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5353 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5354 /* allocate space for temporary storage */ 5355 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5356 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5357 /* allocate for IS array */ 5358 nisdofs = pcbddc->n_ISForDofsLocal; 5359 nisneu = !!pcbddc->NeumannBoundariesLocal; 5360 nisvert = 0; 5361 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5362 nisvert = 1; 5363 } 5364 nis = nisdofs + nisneu + nisvert; 5365 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5366 /* dofs splitting */ 5367 for (i=0;i<nisdofs;i++) { 5368 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5369 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5370 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5371 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5372 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5373 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5374 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5375 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5376 } 5377 /* neumann boundaries */ 5378 if (pcbddc->NeumannBoundariesLocal) { 5379 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5380 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5381 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5382 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5383 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5384 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5385 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5386 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5387 } 5388 /* primal vertices (benign) */ 5389 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5390 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5391 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5392 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5393 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5394 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5395 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5396 } 5397 /* free memory */ 5398 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5399 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5400 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5401 } else { 5402 nis = 0; 5403 nisdofs = 0; 5404 nisneu = 0; 5405 nisvert = 0; 5406 isarray = NULL; 5407 } 5408 /* destroy no longer needed map */ 5409 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5410 5411 /* restrict on coarse candidates (if needed) */ 5412 coarse_mat_is = NULL; 5413 if (csin) { 5414 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5415 if (redist) { 5416 PetscMPIInt rank; 5417 PetscInt spc,n_spc_p1,dest[1],destsize; 5418 5419 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5420 spc = active_procs/ncoarse; 5421 n_spc_p1 = active_procs%ncoarse; 5422 if (im_active) { 5423 destsize = 1; 5424 if (rank > n_spc_p1*(spc+1)-1) { 5425 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5426 } else { 5427 dest[0] = rank/(spc+1); 5428 } 5429 } else { 5430 destsize = 0; 5431 } 5432 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5433 } else if (csin_type_simple) { 5434 PetscMPIInt rank; 5435 PetscInt issize,isidx; 5436 5437 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5438 if (im_active) { 5439 issize = 1; 5440 isidx = (PetscInt)rank; 5441 } else { 5442 issize = 0; 5443 isidx = -1; 5444 } 5445 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5446 } else { /* get a suitable subassembling pattern from MATIS code */ 5447 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5448 } 5449 5450 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5451 if (!redist || ncoarse <= void_procs) { 5452 PetscInt ncoarse_cand,tissize,*nisindices; 5453 PetscInt *coarse_candidates; 5454 const PetscInt* tisindices; 5455 5456 /* get coarse candidates' ranks in pc communicator */ 5457 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5458 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5459 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5460 if (!coarse_candidates[i]) { 5461 coarse_candidates[ncoarse_cand++]=i; 5462 } 5463 } 5464 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5465 5466 5467 if (pcbddc->dbg_flag) { 5468 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5470 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5471 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5472 for (i=0;i<ncoarse_cand;i++) { 5473 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5474 } 5475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5476 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5477 } 5478 /* shift the pattern on coarse candidates */ 5479 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5480 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5481 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5482 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5483 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5484 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5485 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5486 } 5487 if (pcbddc->dbg_flag) { 5488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5489 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5490 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5492 } 5493 } 5494 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5495 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5496 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5497 } else { /* this is the last level, so use just receiving processes in subcomm */ 5498 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5499 } 5500 } else { 5501 if (pcbddc->dbg_flag) { 5502 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5504 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5505 } 5506 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5507 coarse_mat_is = t_coarse_mat_is; 5508 } 5509 5510 /* create local to global scatters for coarse problem */ 5511 if (compute_vecs) { 5512 PetscInt lrows; 5513 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5514 if (coarse_mat_is) { 5515 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5516 } else { 5517 lrows = 0; 5518 } 5519 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5520 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5521 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5522 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5523 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5524 } 5525 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5526 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5527 5528 /* set defaults for coarse KSP and PC */ 5529 if (multilevel_allowed) { 5530 coarse_ksp_type = KSPRICHARDSON; 5531 coarse_pc_type = PCBDDC; 5532 } else { 5533 coarse_ksp_type = KSPPREONLY; 5534 coarse_pc_type = PCREDUNDANT; 5535 } 5536 5537 /* print some info if requested */ 5538 if (pcbddc->dbg_flag) { 5539 if (!multilevel_allowed) { 5540 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5541 if (multilevel_requested) { 5542 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5543 } else if (pcbddc->max_levels) { 5544 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5545 } 5546 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5547 } 5548 } 5549 5550 /* create the coarse KSP object only once with defaults */ 5551 if (coarse_mat_is) { 5552 MatReuse coarse_mat_reuse; 5553 PetscViewer dbg_viewer = NULL; 5554 if (pcbddc->dbg_flag) { 5555 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5556 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5557 } 5558 if (!pcbddc->coarse_ksp) { 5559 char prefix[256],str_level[16]; 5560 size_t len; 5561 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5562 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5563 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5564 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5565 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5566 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5567 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5568 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5569 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5570 /* prefix */ 5571 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5572 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5573 if (!pcbddc->current_level) { 5574 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5575 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5576 } else { 5577 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5578 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5579 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5580 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5581 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5582 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5583 } 5584 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5585 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5586 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5587 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5588 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5589 /* allow user customization */ 5590 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5591 } 5592 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5593 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5594 if (nisdofs) { 5595 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5596 for (i=0;i<nisdofs;i++) { 5597 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5598 } 5599 } 5600 if (nisneu) { 5601 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5602 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5603 } 5604 if (nisvert) { 5605 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5606 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5607 } 5608 5609 /* get some info after set from options */ 5610 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5611 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5612 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5613 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5614 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5615 isbddc = PETSC_FALSE; 5616 } 5617 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5618 if (isredundant) { 5619 KSP inner_ksp; 5620 PC inner_pc; 5621 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5622 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5623 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5624 } 5625 5626 /* assemble coarse matrix */ 5627 if (coarse_reuse) { 5628 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5629 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5630 coarse_mat_reuse = MAT_REUSE_MATRIX; 5631 } else { 5632 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5633 } 5634 if (isbddc || isnn) { 5635 if (isbddc) { /* currently there's no API for this */ 5636 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5637 pcbddc->detect_disconnected = PETSC_TRUE; 5638 } 5639 if (pcbddc->coarsening_ratio > 1) { 5640 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5641 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5642 if (pcbddc->dbg_flag) { 5643 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5644 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5645 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5646 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5647 } 5648 } 5649 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5650 } else { 5651 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5652 coarse_mat = coarse_mat_is; 5653 } 5654 } else { 5655 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5656 } 5657 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5658 5659 /* propagate symmetry info of coarse matrix */ 5660 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5661 if (pc->pmat->symmetric_set) { 5662 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5663 } 5664 if (pc->pmat->hermitian_set) { 5665 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5666 } 5667 if (pc->pmat->spd_set) { 5668 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5669 } 5670 /* set operators */ 5671 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5672 if (pcbddc->dbg_flag) { 5673 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5674 } 5675 } else { /* processes non partecipating to coarse solver (if any) */ 5676 coarse_mat = 0; 5677 } 5678 ierr = PetscFree(isarray);CHKERRQ(ierr); 5679 #if 0 5680 { 5681 PetscViewer viewer; 5682 char filename[256]; 5683 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5684 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5685 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5686 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5687 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5688 } 5689 #endif 5690 5691 /* Compute coarse null space (special handling by BDDC only) */ 5692 #if 0 5693 if (pcbddc->NullSpace) { 5694 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5695 } 5696 #endif 5697 /* hack */ 5698 if (pcbddc->coarse_ksp) { 5699 Vec crhs,csol; 5700 5701 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5702 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5703 if (!csol) { 5704 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5705 } 5706 if (!crhs) { 5707 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5708 } 5709 } 5710 5711 /* compute null space for coarse solver if the benign trick has been requested */ 5712 if (pcbddc->benign_null) { 5713 5714 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5715 for (i=0;i<pcbddc->benign_n;i++) { 5716 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5717 } 5718 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5719 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5720 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5721 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5722 if (coarse_mat) { 5723 Vec nullv; 5724 PetscScalar *array,*array2; 5725 PetscInt nl; 5726 5727 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5728 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5729 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5730 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5731 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5732 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5733 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5734 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5735 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5736 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5737 } 5738 } 5739 5740 if (pcbddc->coarse_ksp) { 5741 PetscBool ispreonly; 5742 5743 if (CoarseNullSpace) { 5744 PetscBool isnull; 5745 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5746 if (0) { 5747 if (isbddc && !pcbddc->benign_saddle_point) { 5748 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5749 } else { 5750 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5751 } 5752 } else { 5753 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5754 } 5755 } 5756 /* setup coarse ksp */ 5757 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5758 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5759 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5760 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5761 KSP check_ksp; 5762 KSPType check_ksp_type; 5763 PC check_pc; 5764 Vec check_vec,coarse_vec; 5765 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5766 PetscInt its; 5767 PetscBool compute_eigs; 5768 PetscReal *eigs_r,*eigs_c; 5769 PetscInt neigs; 5770 const char *prefix; 5771 5772 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5773 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5774 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5775 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5776 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5777 if (ispreonly) { 5778 check_ksp_type = KSPPREONLY; 5779 compute_eigs = PETSC_FALSE; 5780 } else { 5781 check_ksp_type = KSPGMRES; 5782 compute_eigs = PETSC_TRUE; 5783 } 5784 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5785 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5786 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5787 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5788 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5789 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5790 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5791 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5792 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5793 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5794 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5795 /* create random vec */ 5796 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5797 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5798 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5799 if (CoarseNullSpace) { 5800 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5801 } 5802 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5803 /* solve coarse problem */ 5804 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5805 if (CoarseNullSpace) { 5806 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5807 } 5808 /* set eigenvalue estimation if preonly has not been requested */ 5809 if (compute_eigs) { 5810 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5811 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5812 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5813 lambda_max = eigs_r[neigs-1]; 5814 lambda_min = eigs_r[0]; 5815 if (pcbddc->use_coarse_estimates) { 5816 if (lambda_max>lambda_min) { 5817 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5818 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5819 } 5820 } 5821 } 5822 5823 /* check coarse problem residual error */ 5824 if (pcbddc->dbg_flag) { 5825 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5826 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5827 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5828 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5829 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5830 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5831 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5832 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5833 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5834 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5835 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5836 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5837 if (CoarseNullSpace) { 5838 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5839 } 5840 if (compute_eigs) { 5841 PetscReal lambda_max_s,lambda_min_s; 5842 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5843 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5844 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5845 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5846 for (i=0;i<neigs;i++) { 5847 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5848 } 5849 } 5850 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5851 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5852 } 5853 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5854 if (compute_eigs) { 5855 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5856 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5857 } 5858 } 5859 } 5860 /* print additional info */ 5861 if (pcbddc->dbg_flag) { 5862 /* waits until all processes reaches this point */ 5863 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5864 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5866 } 5867 5868 /* free memory */ 5869 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5870 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5871 PetscFunctionReturn(0); 5872 } 5873 5874 #undef __FUNCT__ 5875 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5876 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5877 { 5878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5879 PC_IS* pcis = (PC_IS*)pc->data; 5880 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5881 IS subset,subset_mult,subset_n; 5882 PetscInt local_size,coarse_size=0; 5883 PetscInt *local_primal_indices=NULL; 5884 const PetscInt *t_local_primal_indices; 5885 PetscErrorCode ierr; 5886 5887 PetscFunctionBegin; 5888 /* Compute global number of coarse dofs */ 5889 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5890 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5891 } 5892 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5893 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5894 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5895 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5896 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5897 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5898 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5899 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5900 if (local_size != pcbddc->local_primal_size) { 5901 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5902 } 5903 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5904 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5905 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5906 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5907 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5908 5909 /* check numbering */ 5910 if (pcbddc->dbg_flag) { 5911 PetscScalar coarsesum,*array,*array2; 5912 PetscInt i; 5913 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5914 5915 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5916 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5917 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5918 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5919 /* counter */ 5920 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5921 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5922 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5923 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5924 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5925 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5926 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5927 for (i=0;i<pcbddc->local_primal_size;i++) { 5928 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5929 } 5930 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5931 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5932 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5933 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5934 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5935 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5936 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5937 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5938 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5939 for (i=0;i<pcis->n;i++) { 5940 if (array[i] != 0.0 && array[i] != array2[i]) { 5941 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5942 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5943 set_error = PETSC_TRUE; 5944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5945 } 5946 } 5947 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5948 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5950 for (i=0;i<pcis->n;i++) { 5951 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5952 } 5953 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5954 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5955 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5956 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5957 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5958 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5959 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5960 PetscInt *gidxs; 5961 5962 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5963 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5965 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5966 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5967 for (i=0;i<pcbddc->local_primal_size;i++) { 5968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5969 } 5970 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5971 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5972 } 5973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5974 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5975 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5976 } 5977 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5978 /* get back data */ 5979 *coarse_size_n = coarse_size; 5980 *local_primal_indices_n = local_primal_indices; 5981 PetscFunctionReturn(0); 5982 } 5983 5984 #undef __FUNCT__ 5985 #define __FUNCT__ "PCBDDCGlobalToLocal" 5986 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5987 { 5988 IS localis_t; 5989 PetscInt i,lsize,*idxs,n; 5990 PetscScalar *vals; 5991 PetscErrorCode ierr; 5992 5993 PetscFunctionBegin; 5994 /* get indices in local ordering exploiting local to global map */ 5995 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5996 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5997 for (i=0;i<lsize;i++) vals[i] = 1.0; 5998 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5999 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6000 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6001 if (idxs) { /* multilevel guard */ 6002 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6003 } 6004 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6005 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6006 ierr = PetscFree(vals);CHKERRQ(ierr); 6007 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6008 /* now compute set in local ordering */ 6009 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6010 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6011 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6012 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6013 for (i=0,lsize=0;i<n;i++) { 6014 if (PetscRealPart(vals[i]) > 0.5) { 6015 lsize++; 6016 } 6017 } 6018 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6019 for (i=0,lsize=0;i<n;i++) { 6020 if (PetscRealPart(vals[i]) > 0.5) { 6021 idxs[lsize++] = i; 6022 } 6023 } 6024 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6025 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6026 *localis = localis_t; 6027 PetscFunctionReturn(0); 6028 } 6029 6030 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6031 #undef __FUNCT__ 6032 #define __FUNCT__ "PCBDDCMatMult_Private" 6033 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6034 { 6035 PCBDDCChange_ctx change_ctx; 6036 PetscErrorCode ierr; 6037 6038 PetscFunctionBegin; 6039 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6040 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6041 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6042 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6043 PetscFunctionReturn(0); 6044 } 6045 6046 #undef __FUNCT__ 6047 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6048 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6049 { 6050 PCBDDCChange_ctx change_ctx; 6051 PetscErrorCode ierr; 6052 6053 PetscFunctionBegin; 6054 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6055 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6056 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6057 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6058 PetscFunctionReturn(0); 6059 } 6060 6061 #undef __FUNCT__ 6062 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6063 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6064 { 6065 PC_IS *pcis=(PC_IS*)pc->data; 6066 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6067 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6068 Mat S_j; 6069 PetscInt *used_xadj,*used_adjncy; 6070 PetscBool free_used_adj; 6071 PetscErrorCode ierr; 6072 6073 PetscFunctionBegin; 6074 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6075 free_used_adj = PETSC_FALSE; 6076 if (pcbddc->sub_schurs_layers == -1) { 6077 used_xadj = NULL; 6078 used_adjncy = NULL; 6079 } else { 6080 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6081 used_xadj = pcbddc->mat_graph->xadj; 6082 used_adjncy = pcbddc->mat_graph->adjncy; 6083 } else if (pcbddc->computed_rowadj) { 6084 used_xadj = pcbddc->mat_graph->xadj; 6085 used_adjncy = pcbddc->mat_graph->adjncy; 6086 } else { 6087 PetscBool flg_row=PETSC_FALSE; 6088 const PetscInt *xadj,*adjncy; 6089 PetscInt nvtxs; 6090 6091 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6092 if (flg_row) { 6093 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6094 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6095 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6096 free_used_adj = PETSC_TRUE; 6097 } else { 6098 pcbddc->sub_schurs_layers = -1; 6099 used_xadj = NULL; 6100 used_adjncy = NULL; 6101 } 6102 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6103 } 6104 } 6105 6106 /* setup sub_schurs data */ 6107 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6108 if (!sub_schurs->schur_explicit) { 6109 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6110 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6111 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6112 } else { 6113 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6114 PetscBool isseqaij; 6115 PetscInt benign_n; 6116 6117 if (!pcbddc->use_vertices && reuse_solvers) { 6118 PetscInt n_vertices; 6119 6120 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6121 reuse_solvers = (PetscBool)!n_vertices; 6122 } 6123 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6124 if (!isseqaij) { 6125 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6126 if (matis->A == pcbddc->local_mat) { 6127 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6128 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6129 } else { 6130 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6131 } 6132 } 6133 if (!pcbddc->benign_change_explicit) { 6134 benign_n = pcbddc->benign_n; 6135 } else { 6136 benign_n = 0; 6137 } 6138 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6139 /* if the velocities are in the original basis, then we need to eliminate the no-net-flux from the S_Ej 6140 we assume that the user has passed in the associated quadrature weights in the near-null space attached to pc->pmat 6141 We need to compute the change of basis according to the quadrature weights, and this could not be done, at the moment, without some hacking */ 6142 if (pcbddc->benign_saddle_point && !pcbddc->use_change_of_basis && !pcbddc->user_ChangeOfBasisMatrix) { 6143 PC_IS *pcisf; 6144 PC_BDDC *pcbddcf; 6145 PC pcf; 6146 6147 if (pcbddc->sub_schurs_rebuild) { 6148 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6149 } 6150 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6151 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6152 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6153 /* hacks */ 6154 pcisf = (PC_IS*)pcf->data; 6155 pcbddcf = (PC_BDDC*)pcf->data; 6156 *pcisf = *pcis; 6157 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6158 pcbddcf->mat_graph = pcbddc->mat_graph; 6159 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6160 pcbddcf->sub_schurs = sub_schurs; 6161 pcbddcf->use_faces = PETSC_TRUE; 6162 pcbddcf->use_change_of_basis = PETSC_TRUE; 6163 pcbddcf->use_change_on_faces = PETSC_TRUE; 6164 pcbddcf->use_deluxe_scaling = PETSC_TRUE; 6165 pcbddcf->fake_change = PETSC_TRUE; 6166 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6167 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6168 pcbddcf->sub_schurs = NULL; 6169 ierr = MatDestroy(&pcbddcf->ConstraintMatrix);CHKERRQ(ierr); 6170 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6171 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6172 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6173 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6174 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6175 pcf->ops->destroy = NULL; 6176 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6177 } 6178 } 6179 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6180 6181 /* free adjacency */ 6182 if (free_used_adj) { 6183 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6184 } 6185 PetscFunctionReturn(0); 6186 } 6187 6188 #undef __FUNCT__ 6189 #define __FUNCT__ "PCBDDCInitSubSchurs" 6190 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6191 { 6192 PC_IS *pcis=(PC_IS*)pc->data; 6193 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6194 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6195 PCBDDCGraph graph; 6196 PetscErrorCode ierr; 6197 6198 PetscFunctionBegin; 6199 /* attach interface graph for determining subsets */ 6200 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6201 IS verticesIS,verticescomm; 6202 PetscInt vsize,*idxs; 6203 6204 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6205 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6206 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6207 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6208 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6209 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6210 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6211 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6212 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6213 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6214 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6215 /* 6216 if (pcbddc->dbg_flag) { 6217 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6218 } 6219 */ 6220 } else { 6221 graph = pcbddc->mat_graph; 6222 } 6223 6224 /* sub_schurs init */ 6225 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6226 6227 /* free graph struct */ 6228 if (pcbddc->sub_schurs_rebuild) { 6229 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6230 } 6231 PetscFunctionReturn(0); 6232 } 6233 6234 #undef __FUNCT__ 6235 #define __FUNCT__ "PCBDDCCheckOperator" 6236 PetscErrorCode PCBDDCCheckOperator(PC pc) 6237 { 6238 PC_IS *pcis=(PC_IS*)pc->data; 6239 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6240 PetscErrorCode ierr; 6241 6242 PetscFunctionBegin; 6243 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6244 IS zerodiag = NULL; 6245 Mat S_j,B0_B=NULL; 6246 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6247 PetscScalar *p0_check,*array,*array2; 6248 PetscReal norm; 6249 PetscInt i; 6250 6251 /* B0 and B0_B */ 6252 if (zerodiag) { 6253 IS dummy; 6254 6255 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6256 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6257 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6258 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6259 } 6260 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6261 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6262 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6263 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6264 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6265 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6266 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6267 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6268 /* S_j */ 6269 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6270 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6271 6272 /* mimic vector in \widetilde{W}_\Gamma */ 6273 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6274 /* continuous in primal space */ 6275 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6276 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6277 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6278 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6279 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6280 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6281 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6282 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6283 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6284 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6285 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6286 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6287 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6288 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6289 6290 /* assemble rhs for coarse problem */ 6291 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6292 /* local with Schur */ 6293 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6294 if (zerodiag) { 6295 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6296 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6297 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6298 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6299 } 6300 /* sum on primal nodes the local contributions */ 6301 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6302 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6303 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6304 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6305 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6306 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6307 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6308 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6309 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6310 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6311 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6312 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6313 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6314 /* scale primal nodes (BDDC sums contibutions) */ 6315 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6316 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6317 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6318 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6319 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6320 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6321 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6322 /* global: \widetilde{B0}_B w_\Gamma */ 6323 if (zerodiag) { 6324 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6325 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6326 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6327 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6328 } 6329 /* BDDC */ 6330 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6331 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6332 6333 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6334 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6335 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6336 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6337 for (i=0;i<pcbddc->benign_n;i++) { 6338 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6339 } 6340 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6341 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6342 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6343 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6344 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6345 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6346 } 6347 PetscFunctionReturn(0); 6348 } 6349