1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 if (has_null_pressures) { 650 IS zerodiagc; 651 const PetscInt *idxs,*idxsc; 652 PetscInt i,s,*nnz; 653 654 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 655 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 656 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 657 /* local change of basis for pressures */ 658 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 659 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 660 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 661 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 662 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 663 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 664 for (i=0;i<pcbddc->benign_n;i++) { 665 PetscInt nzs,j; 666 667 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 668 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 669 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 670 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 671 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 672 } 673 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 674 ierr = PetscFree(nnz);CHKERRQ(ierr); 675 /* set identity on velocities */ 676 for (i=0;i<n-nz;i++) { 677 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 678 } 679 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 680 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 681 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 682 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 683 /* set change on pressures */ 684 for (s=0;s<pcbddc->benign_n;s++) { 685 PetscScalar *array; 686 PetscInt nzs; 687 688 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 689 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 690 for (i=0;i<nzs-1;i++) { 691 PetscScalar vals[2]; 692 PetscInt cols[2]; 693 694 cols[0] = idxs[i]; 695 cols[1] = idxs[nzs-1]; 696 vals[0] = 1.; 697 vals[1] = 1.; 698 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 699 } 700 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 701 for (i=0;i<nzs-1;i++) array[i] = -1.; 702 array[nzs-1] = 1.; 703 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 704 /* store local idxs for p0 */ 705 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 706 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 707 ierr = PetscFree(array);CHKERRQ(ierr); 708 } 709 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 /* project if needed */ 712 if (pcbddc->benign_change_explicit) { 713 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 715 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 717 } 718 /* store global idxs for p0 */ 719 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 720 } 721 pcbddc->benign_zerodiag_subs = zerodiag_subs; 722 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 723 724 /* determines if the coarse solver will be singular or not */ 725 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 726 *zerodiaglocal = zerodiag; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 732 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 733 { 734 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 if (!pcbddc->benign_sf) { 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 740 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 741 } 742 if (get) { /* use SF to get values */ 743 PetscScalar *array; 744 745 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 746 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 747 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 748 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 749 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 750 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 752 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 759 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 760 { 761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 /* TODO: add error checking 766 - avoid nested pop (or push) calls. 767 - cannot push before pop. 768 - cannot call this if pcbddc->local_mat is NULL 769 */ 770 if (!pcbddc->benign_n) { 771 PetscFunctionReturn(0); 772 } 773 if (pop) { 774 if (pcbddc->benign_change_explicit) { 775 IS is_p0; 776 MatReuse reuse; 777 778 /* extract B_0 */ 779 reuse = MAT_INITIAL_MATRIX; 780 if (pcbddc->benign_B0) { 781 reuse = MAT_REUSE_MATRIX; 782 } 783 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 784 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 785 /* remove rows and cols from local problem */ 786 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 787 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 788 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 789 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 790 } else { 791 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 792 PetscScalar *vals; 793 PetscInt i,n,*idxs_ins; 794 795 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 796 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 797 if (!pcbddc->benign_B0) { 798 PetscInt *nnz; 799 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 800 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 801 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 802 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 803 for (i=0;i<pcbddc->benign_n;i++) { 804 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 805 nnz[i] = n - nnz[i]; 806 } 807 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 808 ierr = PetscFree(nnz);CHKERRQ(ierr); 809 } 810 811 for (i=0;i<pcbddc->benign_n;i++) { 812 PetscScalar *array; 813 PetscInt *idxs,j,nz,cum; 814 815 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 818 for (j=0;j<nz;j++) vals[j] = 1.; 819 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 821 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 822 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 823 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 824 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 825 cum = 0; 826 for (j=0;j<n;j++) { 827 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 828 vals[cum] = array[j]; 829 idxs_ins[cum] = j; 830 cum++; 831 } 832 } 833 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 834 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 835 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 840 } 841 } else { /* push */ 842 if (pcbddc->benign_change_explicit) { 843 PetscInt i; 844 845 for (i=0;i<pcbddc->benign_n;i++) { 846 PetscScalar *B0_vals; 847 PetscInt *B0_cols,B0_ncol; 848 849 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 850 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 851 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 852 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 853 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 859 } 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCAdaptiveSelection" 866 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 867 { 868 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 869 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 870 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 871 PetscBLASInt *B_iwork,*B_ifail; 872 PetscScalar *work,lwork; 873 PetscScalar *St,*S,*eigv; 874 PetscScalar *Sarray,*Starray; 875 PetscReal *eigs,thresh; 876 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 877 PetscBool allocated_S_St; 878 #if defined(PETSC_USE_COMPLEX) 879 PetscReal *rwork; 880 #endif 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 if (!sub_schurs->schur_explicit) { 885 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 886 } 887 888 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 890 } 891 892 if (pcbddc->dbg_flag) { 893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 if (pcbddc->dbg_flag) { 900 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 901 } 902 903 /* max size of subsets */ 904 mss = 0; 905 for (i=0;i<sub_schurs->n_subs;i++) { 906 PetscInt subset_size; 907 908 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 909 mss = PetscMax(mss,subset_size); 910 } 911 912 /* min/max and threshold */ 913 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 914 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 915 nmax = PetscMax(nmin,nmax); 916 allocated_S_St = PETSC_FALSE; 917 if (nmin) { 918 allocated_S_St = PETSC_TRUE; 919 } 920 921 /* allocate lapack workspace */ 922 cum = cum2 = 0; 923 maxneigs = 0; 924 for (i=0;i<sub_schurs->n_subs;i++) { 925 PetscInt n,subset_size; 926 927 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 928 n = PetscMin(subset_size,nmax); 929 cum += subset_size; 930 cum2 += subset_size*n; 931 maxneigs = PetscMax(maxneigs,n); 932 } 933 if (mss) { 934 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 935 PetscBLASInt B_itype = 1; 936 PetscBLASInt B_N = mss; 937 PetscReal zero = 0.0; 938 PetscReal eps = 0.0; /* dlamch? */ 939 940 B_lwork = -1; 941 S = NULL; 942 St = NULL; 943 eigs = NULL; 944 eigv = NULL; 945 B_iwork = NULL; 946 B_ifail = NULL; 947 #if defined(PETSC_USE_COMPLEX) 948 rwork = NULL; 949 #endif 950 thresh = 1.0; 951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 952 #if defined(PETSC_USE_COMPLEX) 953 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 954 #else 955 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 956 #endif 957 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 958 ierr = PetscFPTrapPop();CHKERRQ(ierr); 959 } else { 960 /* TODO */ 961 } 962 } else { 963 lwork = 0; 964 } 965 966 nv = 0; 967 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 968 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 969 } 970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 971 if (allocated_S_St) { 972 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 973 } 974 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 975 #if defined(PETSC_USE_COMPLEX) 976 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 977 #endif 978 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 979 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 980 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 981 nv+cum,&pcbddc->adaptive_constraints_idxs, 982 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 983 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 984 985 maxneigs = 0; 986 cum = cum2 = cumarray = 0; 987 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 988 pcbddc->adaptive_constraints_data_ptr[0] = 0; 989 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 990 const PetscInt *idxs; 991 992 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 993 for (cum=0;cum<nv;cum++) { 994 pcbddc->adaptive_constraints_n[cum] = 1; 995 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 996 pcbddc->adaptive_constraints_data[cum] = 1.0; 997 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 998 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 999 } 1000 cum2 = cum; 1001 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1002 } 1003 1004 if (mss) { /* multilevel */ 1005 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1006 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1007 } 1008 1009 thresh = pcbddc->adaptive_threshold; 1010 for (i=0;i<sub_schurs->n_subs;i++) { 1011 const PetscInt *idxs; 1012 PetscReal infty = PETSC_MAX_REAL; 1013 PetscInt j,subset_size,eigs_start = 0; 1014 PetscBLASInt B_N; 1015 PetscBool same_data = PETSC_FALSE; 1016 1017 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1018 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1019 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1020 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1021 if (sub_schurs->is_hermitian) { 1022 PetscInt j,k; 1023 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1024 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1025 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1026 } 1027 for (j=0;j<subset_size;j++) { 1028 for (k=j;k<subset_size;k++) { 1029 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1030 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1031 } 1032 } 1033 } else { 1034 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1035 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1036 } 1037 } else { 1038 S = Sarray + cumarray; 1039 St = Starray + cumarray; 1040 } 1041 1042 /* see if we can save some work */ 1043 if (sub_schurs->n_subs == 1) { 1044 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1045 } 1046 1047 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1048 B_neigs = 0; 1049 } else { 1050 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1051 PetscBLASInt B_itype = 1; 1052 PetscBLASInt B_IL, B_IU; 1053 PetscReal eps = -1.0; /* dlamch? */ 1054 PetscInt nmin_s; 1055 1056 if (pcbddc->dbg_flag) { 1057 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1058 } 1059 1060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1061 if (thresh > 1.+PETSC_SMALL) { 1062 1063 /* ask for eigenvalues larger than thresh */ 1064 #if defined(PETSC_USE_COMPLEX) 1065 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1066 #else 1067 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1068 #endif 1069 } else { 1070 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1071 B_IL = 1; 1072 #if defined(PETSC_USE_COMPLEX) 1073 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1074 #else 1075 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1076 #endif 1077 } 1078 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1079 if (B_ierr) { 1080 if (B_ierr < 0 ) { 1081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1082 } else if (B_ierr <= B_N) { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1084 } else { 1085 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1086 } 1087 } 1088 1089 if (B_neigs > nmax) { 1090 if (pcbddc->dbg_flag) { 1091 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1092 } 1093 eigs_start = B_neigs -nmax; 1094 B_neigs = nmax; 1095 } 1096 1097 nmin_s = PetscMin(nmin,B_N); 1098 if (B_neigs < nmin_s) { 1099 PetscBLASInt B_neigs2; 1100 1101 B_IU = B_N - B_neigs; 1102 B_IL = B_N - nmin_s + 1; 1103 if (pcbddc->dbg_flag) { 1104 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1105 } 1106 if (sub_schurs->is_hermitian) { 1107 PetscInt j; 1108 for (j=0;j<subset_size;j++) { 1109 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1110 } 1111 for (j=0;j<subset_size;j++) { 1112 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1113 } 1114 } else { 1115 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1116 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1117 } 1118 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1119 #if defined(PETSC_USE_COMPLEX) 1120 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1121 #else 1122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1123 #endif 1124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1125 B_neigs += B_neigs2; 1126 } 1127 if (B_ierr) { 1128 if (B_ierr < 0 ) { 1129 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1130 } else if (B_ierr <= B_N) { 1131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1132 } else { 1133 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1134 } 1135 } 1136 if (pcbddc->dbg_flag) { 1137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1138 for (j=0;j<B_neigs;j++) { 1139 if (eigs[j] == 0.0) { 1140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1141 } else { 1142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1143 } 1144 } 1145 } 1146 } else { 1147 /* TODO */ 1148 } 1149 } 1150 maxneigs = PetscMax(B_neigs,maxneigs); 1151 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1152 if (B_neigs) { 1153 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1154 1155 if (pcbddc->dbg_flag > 1) { 1156 PetscInt ii; 1157 for (ii=0;ii<B_neigs;ii++) { 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1159 for (j=0;j<B_N;j++) { 1160 #if defined(PETSC_USE_COMPLEX) 1161 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1162 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1164 #else 1165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1166 #endif 1167 } 1168 } 1169 } 1170 #if 0 1171 for (j=0;j<B_neigs;j++) { 1172 PetscBLASInt Blas_N,Blas_one = 1.0; 1173 PetscScalar norm; 1174 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 1175 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 1176 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1177 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 1178 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1179 } else { 1180 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 1181 } 1182 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 1183 } 1184 #endif 1185 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1186 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1187 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1188 cum++; 1189 } 1190 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1191 /* shift for next computation */ 1192 cumarray += subset_size*subset_size; 1193 } 1194 if (pcbddc->dbg_flag) { 1195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1196 } 1197 1198 if (mss) { 1199 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1200 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1201 /* destroy matrices (junk) */ 1202 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1203 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1204 } 1205 if (allocated_S_St) { 1206 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1207 } 1208 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1209 #if defined(PETSC_USE_COMPLEX) 1210 ierr = PetscFree(rwork);CHKERRQ(ierr); 1211 #endif 1212 if (pcbddc->dbg_flag) { 1213 PetscInt maxneigs_r; 1214 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "PCBDDCSetUpSolvers" 1222 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1223 { 1224 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1225 PetscScalar *coarse_submat_vals; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1230 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1231 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1232 1233 /* Setup local neumann solver ksp_R */ 1234 /* PCBDDCSetUpLocalScatters should be called first! */ 1235 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1236 1237 /* Change global null space passed in by the user if change of basis has been requested */ 1238 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1239 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1240 } 1241 1242 /* 1243 Setup local correction and local part of coarse basis. 1244 Gives back the dense local part of the coarse matrix in column major ordering 1245 */ 1246 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1247 1248 /* Compute total number of coarse nodes and setup coarse solver */ 1249 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1250 1251 /* free */ 1252 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "PCBDDCResetCustomization" 1258 PetscErrorCode PCBDDCResetCustomization(PC pc) 1259 { 1260 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1265 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1266 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1267 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1268 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1269 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1270 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1271 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1272 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1273 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1274 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1275 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "PCBDDCResetTopography" 1281 PetscErrorCode PCBDDCResetTopography(PC pc) 1282 { 1283 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1284 PetscInt i; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1289 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1290 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1291 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1292 for (i=0;i<pcbddc->n_local_subs;i++) { 1293 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1296 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "PCBDDCResetSolvers" 1302 PetscErrorCode PCBDDCResetSolvers(PC pc) 1303 { 1304 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1309 if (pcbddc->coarse_phi_B) { 1310 PetscScalar *array; 1311 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1312 ierr = PetscFree(array);CHKERRQ(ierr); 1313 } 1314 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1315 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1316 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1317 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1318 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1319 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1320 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1321 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1322 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1323 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1324 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1325 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1328 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1329 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1330 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1331 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1332 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1333 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1334 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1335 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1336 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1338 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1339 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1341 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1342 if (pcbddc->benign_zerodiag_subs) { 1343 PetscInt i; 1344 for (i=0;i<pcbddc->benign_n;i++) { 1345 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1348 } 1349 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1355 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1356 { 1357 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1358 PC_IS *pcis = (PC_IS*)pc->data; 1359 VecType impVecType; 1360 PetscInt n_constraints,n_R,old_size; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (!pcbddc->ConstraintMatrix) { 1365 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1366 } 1367 /* get sizes */ 1368 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1369 n_R = pcis->n - pcbddc->n_vertices; 1370 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1371 /* local work vectors (try to avoid unneeded work)*/ 1372 /* R nodes */ 1373 old_size = -1; 1374 if (pcbddc->vec1_R) { 1375 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1376 } 1377 if (n_R != old_size) { 1378 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1379 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1380 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1381 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1382 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1383 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1384 } 1385 /* local primal dofs */ 1386 old_size = -1; 1387 if (pcbddc->vec1_P) { 1388 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1389 } 1390 if (pcbddc->local_primal_size != old_size) { 1391 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1392 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1393 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1394 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1395 } 1396 /* local explicit constraints */ 1397 old_size = -1; 1398 if (pcbddc->vec1_C) { 1399 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1400 } 1401 if (n_constraints && n_constraints != old_size) { 1402 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1403 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1404 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1405 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "PCBDDCSetUpCorrection" 1412 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1413 { 1414 PetscErrorCode ierr; 1415 /* pointers to pcis and pcbddc */ 1416 PC_IS* pcis = (PC_IS*)pc->data; 1417 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1418 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1419 /* submatrices of local problem */ 1420 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1421 /* submatrices for benign trick */ 1422 Mat B0_R = NULL, B0_V = NULL; 1423 /* submatrices of local coarse problem */ 1424 Mat S_VV,S_CV,S_VC,S_CC; 1425 /* working matrices */ 1426 Mat C_CR; 1427 /* additional working stuff */ 1428 PC pc_R; 1429 Mat F; 1430 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1431 1432 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1433 PetscScalar *work; 1434 PetscInt *idx_V_B; 1435 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1436 PetscInt i,n_R,n_D,n_B; 1437 1438 /* some shortcuts to scalars */ 1439 PetscScalar one=1.0,m_one=-1.0; 1440 1441 PetscFunctionBegin; 1442 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1444 } 1445 1446 /* Set Non-overlapping dimensions */ 1447 n_vertices = pcbddc->n_vertices; 1448 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1449 n_B = pcis->n_B; 1450 n_D = pcis->n - n_B; 1451 n_R = pcis->n - n_vertices; 1452 1453 /* vertices in boundary numbering */ 1454 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1455 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1456 if (i != n_vertices) { 1457 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1458 } 1459 1460 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1461 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1462 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1463 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1464 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1465 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1466 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1467 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1468 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1469 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1470 1471 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1472 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1473 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1474 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1475 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1476 lda_rhs = n_R; 1477 need_benign_correction = PETSC_FALSE; 1478 if (isLU || isILU || isCHOL) { 1479 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1480 } else if (sub_schurs->reuse_solver) { 1481 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1482 MatFactorType type; 1483 1484 F = reuse_solver->F; 1485 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1486 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1487 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1488 need_benign_correction = !!reuse_solver->benign_n; 1489 } else { 1490 F = NULL; 1491 } 1492 1493 /* allocate workspace */ 1494 n = 0; 1495 if (n_constraints) { 1496 n += lda_rhs*n_constraints; 1497 } 1498 if (n_vertices) { 1499 n = PetscMax(2*lda_rhs*n_vertices,n); 1500 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1501 } 1502 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1503 1504 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1505 if (n_constraints) { 1506 Mat M1,M2,M3; 1507 Mat auxmat; 1508 IS is_aux; 1509 PetscScalar *array,*array2; 1510 1511 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1512 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1513 1514 /* Extract constraints on R nodes: C_{CR} */ 1515 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1516 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1517 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1518 1519 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1520 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1521 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1522 for (i=0;i<n_constraints;i++) { 1523 const PetscScalar *row_cmat_values; 1524 const PetscInt *row_cmat_indices; 1525 PetscInt size_of_constraint,j; 1526 1527 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1528 for (j=0;j<size_of_constraint;j++) { 1529 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1530 } 1531 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1532 } 1533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1534 if (F) { 1535 Mat B; 1536 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1538 if (need_benign_correction) { 1539 PetscScalar *marr; 1540 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1541 1542 ierr = MatDenseGetArray(B,&marr);CHKERRQ(ierr); 1543 for (i=0;i<n_constraints;i++) { 1544 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1545 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1546 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1547 } 1548 ierr = MatDenseRestoreArray(B,&marr);CHKERRQ(ierr); 1549 } 1550 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1551 if (need_benign_correction) { 1552 PetscScalar *marr; 1553 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1554 1555 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1556 for (i=0;i<n_constraints;i++) { 1557 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1558 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1559 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1560 } 1561 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1562 } 1563 ierr = MatDestroy(&B);CHKERRQ(ierr); 1564 } else { 1565 PetscScalar *marr; 1566 1567 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1568 for (i=0;i<n_constraints;i++) { 1569 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1570 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1571 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1572 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1573 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1574 } 1575 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1576 } 1577 if (!pcbddc->switch_static) { 1578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1579 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1580 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1581 for (i=0;i<n_constraints;i++) { 1582 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1583 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1587 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1588 } 1589 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1590 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1591 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1592 } else { 1593 if (lda_rhs != n_R) { 1594 IS dummy; 1595 1596 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1597 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1598 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1599 } else { 1600 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1601 pcbddc->local_auxmat2 = local_auxmat2_R; 1602 } 1603 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1604 } 1605 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1606 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1607 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1608 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1609 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1610 if (isCHOL) { 1611 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1612 } else { 1613 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1614 } 1615 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1616 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1617 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1618 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1619 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1620 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1621 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1622 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1623 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1624 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1625 } 1626 1627 /* Extract B0_R */ 1628 if (pcbddc->benign_n) { 1629 IS dummy; 1630 PetscReal norm; 1631 1632 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1633 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1634 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1635 if (norm < PETSC_SMALL) { 1636 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1637 } 1638 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1639 } 1640 1641 /* Get submatrices from subdomain matrix */ 1642 if (n_vertices) { 1643 IS is_aux; 1644 1645 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1646 IS tis; 1647 1648 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1649 ierr = ISSort(tis);CHKERRQ(ierr); 1650 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1651 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1652 } else { 1653 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1654 } 1655 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1656 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1657 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1658 if (pcbddc->benign_n) { 1659 IS dummy; 1660 1661 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1662 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1663 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1664 } 1665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1666 } 1667 1668 /* Matrix of coarse basis functions (local) */ 1669 if (pcbddc->coarse_phi_B) { 1670 PetscInt on_B,on_primal,on_D=n_D; 1671 if (pcbddc->coarse_phi_D) { 1672 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1673 } 1674 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1675 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1676 PetscScalar *marray; 1677 1678 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1679 ierr = PetscFree(marray);CHKERRQ(ierr); 1680 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1681 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1682 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1683 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1684 } 1685 } 1686 1687 if (!pcbddc->coarse_phi_B) { 1688 PetscScalar *marray; 1689 1690 n = n_B*pcbddc->local_primal_size; 1691 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1692 n += n_D*pcbddc->local_primal_size; 1693 } 1694 if (!pcbddc->symmetric_primal) { 1695 n *= 2; 1696 } 1697 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1698 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1699 n = n_B*pcbddc->local_primal_size; 1700 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1702 n += n_D*pcbddc->local_primal_size; 1703 } 1704 if (!pcbddc->symmetric_primal) { 1705 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1707 n = n_B*pcbddc->local_primal_size; 1708 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1709 } 1710 } else { 1711 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1712 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1713 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1714 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1715 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1716 } 1717 } 1718 } 1719 1720 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1721 p0_lidx_I = NULL; 1722 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1723 const PetscInt *idxs; 1724 1725 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1726 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1727 for (i=0;i<pcbddc->benign_n;i++) { 1728 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1729 } 1730 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1731 } 1732 1733 /* vertices */ 1734 if (n_vertices) { 1735 1736 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1737 1738 if (n_R) { 1739 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1740 PetscBLASInt B_N,B_one = 1; 1741 PetscScalar *x,*y; 1742 PetscBool isseqaij; 1743 1744 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1745 if (need_benign_correction) { 1746 ISLocalToGlobalMapping RtoN; 1747 IS is_p0; 1748 PetscInt *idxs_p0,n; 1749 1750 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1751 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1752 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1753 if (n != pcbddc->benign_n) { 1754 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1755 } 1756 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1757 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1758 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1759 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1760 } 1761 1762 if (lda_rhs == n_R) { 1763 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1764 } else { 1765 PetscScalar *av,*array; 1766 const PetscInt *xadj,*adjncy; 1767 PetscInt n; 1768 PetscBool flg_row; 1769 1770 array = work+lda_rhs*n_vertices; 1771 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1772 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1774 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1775 for (i=0;i<n;i++) { 1776 PetscInt j; 1777 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1778 } 1779 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1780 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1781 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1782 } 1783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1784 if (F) { 1785 if (need_benign_correction) { 1786 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1787 PetscScalar *marr; 1788 1789 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1790 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1791 1792 | 0 0 0 | (V) 1793 L = | 0 0 -1 | (P-p0) 1794 | 0 0 -1 | (p0) 1795 1796 */ 1797 for (i=0;i<reuse_solver->benign_n;i++) { 1798 const PetscScalar *vals; 1799 const PetscInt *idxs,*idxs_zero; 1800 PetscInt n,j,nz; 1801 1802 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1803 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1804 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1805 for (j=0;j<n;j++) { 1806 PetscScalar val = vals[j]; 1807 PetscInt k,col = idxs[j]; 1808 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1809 } 1810 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1811 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1812 } 1813 /* need to correct the rhs */ 1814 for (i=0;i<n_vertices;i++) { 1815 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1816 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE);CHKERRQ(ierr); 1817 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1818 } 1819 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1820 } 1821 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1822 /* need to correct the solution */ 1823 if (need_benign_correction) { 1824 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1825 PetscScalar *marr; 1826 1827 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1828 for (i=0;i<n_vertices;i++) { 1829 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1830 ierr = PCBDDCReuseSolversChangeInterior(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE);CHKERRQ(ierr); 1831 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1832 } 1833 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1834 } 1835 } else { 1836 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1837 for (i=0;i<n_vertices;i++) { 1838 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1839 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1840 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1841 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1842 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1843 } 1844 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1845 } 1846 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1847 /* S_VV and S_CV */ 1848 if (n_constraints) { 1849 Mat B; 1850 1851 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1852 for (i=0;i<n_vertices;i++) { 1853 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1854 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1858 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1859 } 1860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1861 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1862 ierr = MatDestroy(&B);CHKERRQ(ierr); 1863 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1864 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1865 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1867 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1868 ierr = MatDestroy(&B);CHKERRQ(ierr); 1869 } 1870 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1871 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1872 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1873 } 1874 if (lda_rhs != n_R) { 1875 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1877 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1878 } 1879 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1880 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1881 if (need_benign_correction) { 1882 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1883 PetscScalar *marr,*sums; 1884 1885 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1886 ierr = MatDenseGetArray(S_VVt,&marr); 1887 for (i=0;i<reuse_solver->benign_n;i++) { 1888 const PetscScalar *vals; 1889 const PetscInt *idxs,*idxs_zero; 1890 PetscInt n,j,nz; 1891 1892 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1893 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1894 for (j=0;j<n_vertices;j++) { 1895 PetscInt k; 1896 sums[j] = 0.; 1897 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1898 } 1899 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1900 for (j=0;j<n;j++) { 1901 PetscScalar val = vals[j]; 1902 PetscInt k; 1903 for (k=0;k<n_vertices;k++) { 1904 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1905 } 1906 } 1907 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1908 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1909 } 1910 ierr = PetscFree(sums);CHKERRQ(ierr); 1911 ierr = MatDenseRestoreArray(S_VVt,&marr); 1912 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1913 } 1914 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1915 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1916 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1917 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1918 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1919 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1920 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1921 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1922 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1923 } else { 1924 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1925 } 1926 for (i=0;i<pcbddc->benign_n;i++) { 1927 const PetscScalar *vals; 1928 const PetscInt *idxs; 1929 PetscInt n,j,primal_idx; 1930 1931 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1932 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1933 for (j=0;j<n;j++) { 1934 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1935 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1936 } 1937 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1938 } 1939 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1940 1941 /* coarse basis functions */ 1942 for (i=0;i<n_vertices;i++) { 1943 PetscScalar *y; 1944 1945 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1946 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1947 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1948 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1950 y[n_B*i+idx_V_B[i]] = 1.0; 1951 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1952 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1953 1954 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1955 PetscInt j; 1956 1957 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1958 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1959 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1962 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1963 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1964 } 1965 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1966 } 1967 /* if n_R == 0 the object is not destroyed */ 1968 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1969 } 1970 1971 if (n_constraints) { 1972 Mat B; 1973 1974 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1975 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1976 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1977 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1978 if (n_vertices) { 1979 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1980 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1981 } else { 1982 Mat S_VCt; 1983 1984 if (lda_rhs != n_R) { 1985 ierr = MatDestroy(&B);CHKERRQ(ierr); 1986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1987 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1988 } 1989 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1990 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1991 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1992 } 1993 } 1994 ierr = MatDestroy(&B);CHKERRQ(ierr); 1995 /* coarse basis functions */ 1996 for (i=0;i<n_constraints;i++) { 1997 PetscScalar *y; 1998 1999 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2000 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2005 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2006 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2007 PetscInt j; 2008 2009 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2010 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2011 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2014 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2015 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2016 } 2017 if (B0_R) { 2018 Vec benign_proj; 2019 const PetscScalar *array; 2020 PetscInt j; 2021 2022 ierr = MatCreateVecs(B0_R,NULL,&benign_proj);CHKERRQ(ierr); 2023 ierr = MatMult(B0_R,pcbddc->vec1_R,benign_proj);CHKERRQ(ierr); 2024 ierr = VecGetArrayRead(benign_proj,&array);CHKERRQ(ierr); 2025 for (j=0;j<pcbddc->benign_n;j++) { 2026 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2027 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i+n_vertices] = array[j]; 2028 coarse_submat_vals[(i+n_vertices)*pcbddc->local_primal_size+primal_idx] = array[j]; 2029 } 2030 ierr = VecRestoreArrayRead(benign_proj,&array);CHKERRQ(ierr); 2031 ierr = VecDestroy(&benign_proj);CHKERRQ(ierr); 2032 } 2033 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2034 } 2035 } 2036 if (n_constraints) { 2037 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2038 } 2039 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2040 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 2041 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 2042 2043 /* compute other basis functions for non-symmetric problems */ 2044 if (!pcbddc->symmetric_primal) { 2045 Mat B_V=NULL,B_C=NULL; 2046 PetscScalar *marray; 2047 2048 if (n_constraints) { 2049 Mat S_CCT,C_CRT; 2050 2051 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2052 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2053 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2054 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2055 if (n_vertices) { 2056 Mat S_VCT; 2057 2058 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2059 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2060 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2061 } 2062 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2063 } 2064 if (n_vertices && n_R) { 2065 PetscScalar *av,*marray; 2066 const PetscInt *xadj,*adjncy; 2067 PetscInt n; 2068 PetscBool flg_row; 2069 2070 /* B_V = B_V - A_VR^T */ 2071 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2072 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2073 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2074 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2075 for (i=0;i<n;i++) { 2076 PetscInt j; 2077 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2078 } 2079 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2080 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2081 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2082 } 2083 2084 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2085 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2086 for (i=0;i<n_vertices;i++) { 2087 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2088 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2089 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2090 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2091 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2092 } 2093 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2094 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2095 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2096 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2097 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2098 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2099 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2100 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2101 } 2102 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2103 /* coarse basis functions */ 2104 for (i=0;i<pcbddc->local_primal_size;i++) { 2105 PetscScalar *y; 2106 2107 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2108 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2109 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2110 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2111 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 if (i<n_vertices) { 2113 y[n_B*i+idx_V_B[i]] = 1.0; 2114 } 2115 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2116 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2117 2118 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2119 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2120 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2121 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2122 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2123 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2124 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2125 } 2126 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2127 } 2128 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2129 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2130 } 2131 /* free memory */ 2132 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2133 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2134 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2135 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2136 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2137 ierr = PetscFree(work);CHKERRQ(ierr); 2138 if (n_vertices) { 2139 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2140 } 2141 if (n_constraints) { 2142 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2143 } 2144 /* Checking coarse_sub_mat and coarse basis functios */ 2145 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2146 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2147 if (pcbddc->dbg_flag) { 2148 Mat coarse_sub_mat; 2149 Mat AUXMAT,TM1,TM2,TM3,TM4; 2150 Mat coarse_phi_D,coarse_phi_B; 2151 Mat coarse_psi_D,coarse_psi_B; 2152 Mat A_II,A_BB,A_IB,A_BI; 2153 Mat C_B,CPHI; 2154 IS is_dummy; 2155 Vec mones; 2156 MatType checkmattype=MATSEQAIJ; 2157 PetscReal real_value; 2158 2159 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2160 Mat A; 2161 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2162 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2163 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2164 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2165 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2166 ierr = MatDestroy(&A);CHKERRQ(ierr); 2167 } else { 2168 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2169 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2170 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2171 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2172 } 2173 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2174 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2175 if (!pcbddc->symmetric_primal) { 2176 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2177 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2178 } 2179 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2180 2181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2184 if (!pcbddc->symmetric_primal) { 2185 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2186 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2187 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2188 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2189 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2190 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2191 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2192 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2193 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2194 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2195 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2196 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2197 } else { 2198 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2199 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2200 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2201 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2202 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2203 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2204 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2205 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2206 } 2207 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2208 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2209 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2210 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2211 if (pcbddc->benign_n) { 2212 Mat B0_B,B0_BPHI; 2213 PetscScalar *data,*data2; 2214 PetscInt j; 2215 2216 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2217 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2218 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2219 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2220 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2221 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2222 for (j=0;j<pcbddc->benign_n;j++) { 2223 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2224 for (i=0;i<pcbddc->local_primal_size;i++) { 2225 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2226 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2227 } 2228 } 2229 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2230 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2231 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2232 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2233 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2234 } 2235 #if 0 2236 { 2237 PetscViewer viewer; 2238 char filename[256]; 2239 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2240 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2241 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2242 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2243 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2244 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2245 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2246 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2247 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2248 if (pcbddc->coarse_phi_D) { 2249 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2250 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2251 } 2252 if (pcbddc->coarse_psi_B) { 2253 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2254 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2255 } 2256 if (pcbddc->coarse_phi_B) { 2257 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2258 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2259 } 2260 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2261 } 2262 #endif 2263 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2264 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2267 2268 /* check constraints */ 2269 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2270 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2271 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2272 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2273 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2274 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2275 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2276 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2278 if (!pcbddc->symmetric_primal) { 2279 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2280 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2281 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2282 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2283 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2284 } 2285 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2286 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2287 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2288 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2289 } 2290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2291 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2292 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2293 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2294 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2295 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2296 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2297 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2298 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2299 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2300 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2301 if (!pcbddc->symmetric_primal) { 2302 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2303 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2304 } 2305 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2306 } 2307 /* get back data */ 2308 *coarse_submat_vals_n = coarse_submat_vals; 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2314 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2315 { 2316 Mat *work_mat; 2317 IS isrow_s,iscol_s; 2318 PetscBool rsorted,csorted; 2319 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2320 PetscErrorCode ierr; 2321 2322 PetscFunctionBegin; 2323 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2324 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2325 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2326 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2327 2328 if (!rsorted) { 2329 const PetscInt *idxs; 2330 PetscInt *idxs_sorted,i; 2331 2332 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2333 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2334 for (i=0;i<rsize;i++) { 2335 idxs_perm_r[i] = i; 2336 } 2337 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2338 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2339 for (i=0;i<rsize;i++) { 2340 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2341 } 2342 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2343 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2344 } else { 2345 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2346 isrow_s = isrow; 2347 } 2348 2349 if (!csorted) { 2350 if (isrow == iscol) { 2351 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2352 iscol_s = isrow_s; 2353 } else { 2354 const PetscInt *idxs; 2355 PetscInt *idxs_sorted,i; 2356 2357 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2358 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2359 for (i=0;i<csize;i++) { 2360 idxs_perm_c[i] = i; 2361 } 2362 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2363 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2364 for (i=0;i<csize;i++) { 2365 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2366 } 2367 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2368 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2369 } 2370 } else { 2371 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2372 iscol_s = iscol; 2373 } 2374 2375 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2376 2377 if (!rsorted || !csorted) { 2378 Mat new_mat; 2379 IS is_perm_r,is_perm_c; 2380 2381 if (!rsorted) { 2382 PetscInt *idxs_r,i; 2383 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2384 for (i=0;i<rsize;i++) { 2385 idxs_r[idxs_perm_r[i]] = i; 2386 } 2387 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2388 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2389 } else { 2390 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2391 } 2392 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2393 2394 if (!csorted) { 2395 if (isrow_s == iscol_s) { 2396 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2397 is_perm_c = is_perm_r; 2398 } else { 2399 PetscInt *idxs_c,i; 2400 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2401 for (i=0;i<csize;i++) { 2402 idxs_c[idxs_perm_c[i]] = i; 2403 } 2404 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2405 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2406 } 2407 } else { 2408 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2409 } 2410 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2411 2412 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2413 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2414 work_mat[0] = new_mat; 2415 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2416 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2417 } 2418 2419 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2420 *B = work_mat[0]; 2421 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2422 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2423 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 #undef __FUNCT__ 2428 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2429 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2430 { 2431 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2432 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2433 Mat new_mat; 2434 IS is_local,is_global; 2435 PetscInt local_size; 2436 PetscBool isseqaij; 2437 PetscErrorCode ierr; 2438 2439 PetscFunctionBegin; 2440 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2441 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2442 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2443 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2444 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2445 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2446 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2447 2448 /* check */ 2449 if (pcbddc->dbg_flag) { 2450 Vec x,x_change; 2451 PetscReal error; 2452 2453 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2454 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2455 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2456 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2457 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2458 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2459 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2460 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2461 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2462 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2464 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2465 ierr = VecDestroy(&x);CHKERRQ(ierr); 2466 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2467 } 2468 2469 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2470 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2471 if (isseqaij) { 2472 Mat M; 2473 2474 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2475 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2476 ierr = MatDestroy(&M);CHKERRQ(ierr); 2477 } else { 2478 Mat work_mat,M; 2479 2480 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2481 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2482 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2483 ierr = MatDestroy(&M);CHKERRQ(ierr); 2484 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2485 } 2486 if (matis->A->symmetric_set) { 2487 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2488 #if !defined(PETSC_USE_COMPLEX) 2489 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2490 #endif 2491 } 2492 /* 2493 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2494 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2495 */ 2496 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2502 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2503 { 2504 PC_IS* pcis = (PC_IS*)(pc->data); 2505 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2506 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2507 PetscInt *idx_R_local=NULL; 2508 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2509 PetscInt vbs,bs; 2510 PetscBT bitmask=NULL; 2511 PetscErrorCode ierr; 2512 2513 PetscFunctionBegin; 2514 /* 2515 No need to setup local scatters if 2516 - primal space is unchanged 2517 AND 2518 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2519 AND 2520 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2521 */ 2522 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2523 PetscFunctionReturn(0); 2524 } 2525 /* destroy old objects */ 2526 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2527 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2528 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2529 /* Set Non-overlapping dimensions */ 2530 n_B = pcis->n_B; 2531 n_D = pcis->n - n_B; 2532 n_vertices = pcbddc->n_vertices; 2533 2534 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2535 2536 /* create auxiliary bitmask and allocate workspace */ 2537 if (!sub_schurs->reuse_solver) { 2538 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2539 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2540 for (i=0;i<n_vertices;i++) { 2541 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2542 } 2543 2544 for (i=0, n_R=0; i<pcis->n; i++) { 2545 if (!PetscBTLookup(bitmask,i)) { 2546 idx_R_local[n_R++] = i; 2547 } 2548 } 2549 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2550 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2551 2552 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2553 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2554 } 2555 2556 /* Block code */ 2557 vbs = 1; 2558 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2559 if (bs>1 && !(n_vertices%bs)) { 2560 PetscBool is_blocked = PETSC_TRUE; 2561 PetscInt *vary; 2562 if (!sub_schurs->reuse_solver) { 2563 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2564 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2565 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2566 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2567 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2568 for (i=0; i<pcis->n/bs; i++) { 2569 if (vary[i]!=0 && vary[i]!=bs) { 2570 is_blocked = PETSC_FALSE; 2571 break; 2572 } 2573 } 2574 ierr = PetscFree(vary);CHKERRQ(ierr); 2575 } else { 2576 /* Verify directly the R set */ 2577 for (i=0; i<n_R/bs; i++) { 2578 PetscInt j,node=idx_R_local[bs*i]; 2579 for (j=1; j<bs; j++) { 2580 if (node != idx_R_local[bs*i+j]-j) { 2581 is_blocked = PETSC_FALSE; 2582 break; 2583 } 2584 } 2585 } 2586 } 2587 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2588 vbs = bs; 2589 for (i=0;i<n_R/vbs;i++) { 2590 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2591 } 2592 } 2593 } 2594 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2595 if (sub_schurs->reuse_solver) { 2596 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2597 2598 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2599 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2600 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2601 reuse_solver->is_R = pcbddc->is_R_local; 2602 } else { 2603 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2604 } 2605 2606 /* print some info if requested */ 2607 if (pcbddc->dbg_flag) { 2608 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2609 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2610 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2614 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2615 } 2616 2617 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2618 if (!sub_schurs->reuse_solver) { 2619 IS is_aux1,is_aux2; 2620 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2621 2622 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2623 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2624 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2625 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2626 for (i=0; i<n_D; i++) { 2627 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2628 } 2629 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2630 for (i=0, j=0; i<n_R; i++) { 2631 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2632 aux_array1[j++] = i; 2633 } 2634 } 2635 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2636 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2637 for (i=0, j=0; i<n_B; i++) { 2638 if (!PetscBTLookup(bitmask,is_indices[i])) { 2639 aux_array2[j++] = i; 2640 } 2641 } 2642 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2643 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2644 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2645 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2646 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2647 2648 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2649 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2650 for (i=0, j=0; i<n_R; i++) { 2651 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2652 aux_array1[j++] = i; 2653 } 2654 } 2655 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2656 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2657 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2658 } 2659 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2660 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2661 } else { 2662 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2663 IS tis; 2664 PetscInt schur_size; 2665 2666 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2667 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2668 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2669 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2670 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2671 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2672 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2673 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2674 } 2675 } 2676 PetscFunctionReturn(0); 2677 } 2678 2679 2680 #undef __FUNCT__ 2681 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2682 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2683 { 2684 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2685 PC_IS *pcis = (PC_IS*)pc->data; 2686 PC pc_temp; 2687 Mat A_RR; 2688 MatReuse reuse; 2689 PetscScalar m_one = -1.0; 2690 PetscReal value; 2691 PetscInt n_D,n_R; 2692 PetscBool use_exact,use_exact_reduced,issbaij; 2693 PetscErrorCode ierr; 2694 /* prefixes stuff */ 2695 char dir_prefix[256],neu_prefix[256],str_level[16]; 2696 size_t len; 2697 2698 PetscFunctionBegin; 2699 2700 /* compute prefixes */ 2701 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2702 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2703 if (!pcbddc->current_level) { 2704 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2705 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2706 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2707 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2708 } else { 2709 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2710 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2711 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2712 len -= 15; /* remove "pc_bddc_coarse_" */ 2713 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2714 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2715 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2716 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2717 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2718 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2719 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2720 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2721 } 2722 2723 /* DIRICHLET PROBLEM */ 2724 if (dirichlet) { 2725 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2726 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2727 if (!sub_schurs->reuse_solver) { 2728 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2729 } 2730 if (pcbddc->dbg_flag) { 2731 Mat A_IIn; 2732 2733 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2734 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2735 pcis->A_II = A_IIn; 2736 } 2737 } 2738 if (pcbddc->local_mat->symmetric_set) { 2739 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2740 } 2741 /* Matrix for Dirichlet problem is pcis->A_II */ 2742 n_D = pcis->n - pcis->n_B; 2743 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2744 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2745 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2746 /* default */ 2747 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2748 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2749 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2750 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2751 if (issbaij) { 2752 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2753 } else { 2754 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2755 } 2756 /* Allow user's customization */ 2757 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2758 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2759 } 2760 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2761 if (sub_schurs->reuse_solver) { 2762 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2763 2764 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2765 } 2766 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2767 if (!n_D) { 2768 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2769 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2770 } 2771 /* Set Up KSP for Dirichlet problem of BDDC */ 2772 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2773 /* set ksp_D into pcis data */ 2774 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2775 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2776 pcis->ksp_D = pcbddc->ksp_D; 2777 } 2778 2779 /* NEUMANN PROBLEM */ 2780 A_RR = 0; 2781 if (neumann) { 2782 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2783 PetscInt ibs,mbs; 2784 PetscBool issbaij; 2785 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2786 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2787 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2788 if (pcbddc->ksp_R) { /* already created ksp */ 2789 PetscInt nn_R; 2790 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2791 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2792 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2793 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2794 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2795 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2796 reuse = MAT_INITIAL_MATRIX; 2797 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2798 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2799 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2800 reuse = MAT_INITIAL_MATRIX; 2801 } else { /* safe to reuse the matrix */ 2802 reuse = MAT_REUSE_MATRIX; 2803 } 2804 } 2805 /* last check */ 2806 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2807 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2808 reuse = MAT_INITIAL_MATRIX; 2809 } 2810 } else { /* first time, so we need to create the matrix */ 2811 reuse = MAT_INITIAL_MATRIX; 2812 } 2813 /* extract A_RR */ 2814 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2815 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2816 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2817 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2818 if (matis->A == pcbddc->local_mat) { 2819 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2820 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2821 } else { 2822 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2823 } 2824 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2825 if (matis->A == pcbddc->local_mat) { 2826 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2827 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2828 } else { 2829 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2830 } 2831 } 2832 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2833 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2834 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2835 } else { 2836 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2837 } 2838 if (pcbddc->local_mat->symmetric_set) { 2839 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2840 } 2841 if (!pcbddc->ksp_R) { /* create object if not present */ 2842 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2843 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2844 /* default */ 2845 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2846 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2847 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2848 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2849 if (issbaij) { 2850 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2851 } else { 2852 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2853 } 2854 /* Allow user's customization */ 2855 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2856 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2857 } 2858 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2859 if (!n_R) { 2860 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2861 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2862 } 2863 /* Reuse solver if it is present */ 2864 if (sub_schurs->reuse_solver) { 2865 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2866 2867 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2868 } 2869 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2870 /* Set Up KSP for Neumann problem of BDDC */ 2871 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2872 } 2873 /* free Neumann problem's matrix */ 2874 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2875 2876 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2877 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2878 if (pcbddc->dbg_flag) { 2879 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2880 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2881 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2882 } 2883 if (dirichlet) { /* Dirichlet */ 2884 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2885 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2886 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2887 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2888 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2889 /* need to be adapted? */ 2890 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2891 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2892 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2893 /* print info */ 2894 if (pcbddc->dbg_flag) { 2895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2897 } 2898 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2899 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2900 } 2901 } 2902 if (neumann) { /* Neumann */ 2903 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2904 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2905 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2906 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2907 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2908 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2909 /* need to be adapted? */ 2910 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2911 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2912 /* print info */ 2913 if (pcbddc->dbg_flag) { 2914 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2915 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2916 } 2917 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2918 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2919 } 2920 } 2921 } 2922 PetscFunctionReturn(0); 2923 } 2924 2925 #undef __FUNCT__ 2926 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2927 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2928 { 2929 PetscErrorCode ierr; 2930 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2931 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2932 2933 PetscFunctionBegin; 2934 if (!sub_schurs->reuse_solver) { 2935 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2936 } 2937 if (!pcbddc->switch_static) { 2938 if (applytranspose && pcbddc->local_auxmat1) { 2939 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2940 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2941 } 2942 if (!sub_schurs->reuse_solver) { 2943 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2944 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2945 } else { 2946 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2947 2948 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2949 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2950 } 2951 } else { 2952 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2953 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2954 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2955 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2956 if (applytranspose && pcbddc->local_auxmat1) { 2957 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2958 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2959 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2960 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2961 } 2962 } 2963 if (!sub_schurs->reuse_solver) { 2964 if (applytranspose) { 2965 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2966 } else { 2967 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2968 } 2969 } else { 2970 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2971 2972 if (applytranspose) { 2973 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2974 } else { 2975 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 2976 } 2977 } 2978 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2979 if (!pcbddc->switch_static) { 2980 if (!sub_schurs->reuse_solver) { 2981 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2982 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2983 } else { 2984 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2985 2986 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2987 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2988 } 2989 if (!applytranspose && pcbddc->local_auxmat1) { 2990 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2991 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2992 } 2993 } else { 2994 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2995 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2996 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2997 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2998 if (!applytranspose && pcbddc->local_auxmat1) { 2999 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3000 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3001 } 3002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3004 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3005 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3006 } 3007 PetscFunctionReturn(0); 3008 } 3009 3010 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3013 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3014 { 3015 PetscErrorCode ierr; 3016 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3017 PC_IS* pcis = (PC_IS*) (pc->data); 3018 const PetscScalar zero = 0.0; 3019 3020 PetscFunctionBegin; 3021 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3022 if (!pcbddc->benign_apply_coarse_only) { 3023 if (applytranspose) { 3024 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3025 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3026 } else { 3027 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3028 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3029 } 3030 } else { 3031 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3032 } 3033 3034 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3035 if (pcbddc->benign_n) { 3036 PetscScalar *array; 3037 PetscInt j; 3038 3039 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3040 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3041 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3042 } 3043 3044 /* start communications from local primal nodes to rhs of coarse solver */ 3045 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3046 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3047 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3048 3049 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3050 /* TODO remove null space when doing multilevel */ 3051 if (pcbddc->coarse_ksp) { 3052 Mat coarse_mat; 3053 Vec rhs,sol; 3054 MatNullSpace nullsp; 3055 3056 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3057 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3058 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3059 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3060 if (nullsp) { 3061 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3062 } 3063 if (applytranspose) { 3064 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3065 } else { 3066 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3067 } 3068 if (nullsp) { 3069 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3070 } 3071 } 3072 3073 /* Local solution on R nodes */ 3074 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3075 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3076 } 3077 /* communications from coarse sol to local primal nodes */ 3078 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3079 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3080 3081 /* Sum contributions from the two levels */ 3082 if (!pcbddc->benign_apply_coarse_only) { 3083 if (applytranspose) { 3084 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3085 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3086 } else { 3087 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3088 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3089 } 3090 /* store p0 */ 3091 if (pcbddc->benign_n) { 3092 PetscScalar *array; 3093 PetscInt j; 3094 3095 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3096 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3097 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3098 } 3099 } else { /* expand the coarse solution */ 3100 if (applytranspose) { 3101 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3102 } else { 3103 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3104 } 3105 } 3106 PetscFunctionReturn(0); 3107 } 3108 3109 #undef __FUNCT__ 3110 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3111 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3112 { 3113 PetscErrorCode ierr; 3114 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3115 PetscScalar *array; 3116 Vec from,to; 3117 3118 PetscFunctionBegin; 3119 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3120 from = pcbddc->coarse_vec; 3121 to = pcbddc->vec1_P; 3122 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3123 Vec tvec; 3124 3125 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3126 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3127 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3128 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3129 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3130 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3131 } 3132 } else { /* from local to global -> put data in coarse right hand side */ 3133 from = pcbddc->vec1_P; 3134 to = pcbddc->coarse_vec; 3135 } 3136 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3137 PetscFunctionReturn(0); 3138 } 3139 3140 #undef __FUNCT__ 3141 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3142 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3143 { 3144 PetscErrorCode ierr; 3145 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3146 PetscScalar *array; 3147 Vec from,to; 3148 3149 PetscFunctionBegin; 3150 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3151 from = pcbddc->coarse_vec; 3152 to = pcbddc->vec1_P; 3153 } else { /* from local to global -> put data in coarse right hand side */ 3154 from = pcbddc->vec1_P; 3155 to = pcbddc->coarse_vec; 3156 } 3157 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3158 if (smode == SCATTER_FORWARD) { 3159 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3160 Vec tvec; 3161 3162 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3163 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3164 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3165 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3166 } 3167 } else { 3168 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3169 ierr = VecResetArray(from);CHKERRQ(ierr); 3170 } 3171 } 3172 PetscFunctionReturn(0); 3173 } 3174 3175 /* uncomment for testing purposes */ 3176 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3179 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3180 { 3181 PetscErrorCode ierr; 3182 PC_IS* pcis = (PC_IS*)(pc->data); 3183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3184 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3185 /* one and zero */ 3186 PetscScalar one=1.0,zero=0.0; 3187 /* space to store constraints and their local indices */ 3188 PetscScalar *constraints_data; 3189 PetscInt *constraints_idxs,*constraints_idxs_B; 3190 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3191 PetscInt *constraints_n; 3192 /* iterators */ 3193 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3194 /* BLAS integers */ 3195 PetscBLASInt lwork,lierr; 3196 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3197 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3198 /* reuse */ 3199 PetscInt olocal_primal_size,olocal_primal_size_cc; 3200 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3201 /* change of basis */ 3202 PetscBool qr_needed; 3203 PetscBT change_basis,qr_needed_idx; 3204 /* auxiliary stuff */ 3205 PetscInt *nnz,*is_indices; 3206 PetscInt ncc; 3207 /* some quantities */ 3208 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3209 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3210 3211 PetscFunctionBegin; 3212 /* Destroy Mat objects computed previously */ 3213 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3214 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3215 /* save info on constraints from previous setup (if any) */ 3216 olocal_primal_size = pcbddc->local_primal_size; 3217 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3218 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3219 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3220 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3221 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3222 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3223 3224 /* print some info */ 3225 if (pcbddc->dbg_flag) { 3226 IS vertices; 3227 PetscInt nv,nedges,nfaces; 3228 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3229 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3230 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3231 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3233 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3234 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3236 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3237 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3238 } 3239 3240 if (!pcbddc->adaptive_selection) { 3241 IS ISForVertices,*ISForFaces,*ISForEdges; 3242 MatNullSpace nearnullsp; 3243 const Vec *nearnullvecs; 3244 Vec *localnearnullsp; 3245 PetscScalar *array; 3246 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3247 PetscBool nnsp_has_cnst; 3248 /* LAPACK working arrays for SVD or POD */ 3249 PetscBool skip_lapack,boolforchange; 3250 PetscScalar *work; 3251 PetscReal *singular_vals; 3252 #if defined(PETSC_USE_COMPLEX) 3253 PetscReal *rwork; 3254 #endif 3255 #if defined(PETSC_MISSING_LAPACK_GESVD) 3256 PetscScalar *temp_basis,*correlation_mat; 3257 #else 3258 PetscBLASInt dummy_int=1; 3259 PetscScalar dummy_scalar=1.; 3260 #endif 3261 3262 /* Get index sets for faces, edges and vertices from graph */ 3263 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3264 /* free unneeded index sets */ 3265 if (!pcbddc->use_vertices) { 3266 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3267 } 3268 if (!pcbddc->use_edges) { 3269 for (i=0;i<n_ISForEdges;i++) { 3270 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3271 } 3272 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3273 n_ISForEdges = 0; 3274 } 3275 if (!pcbddc->use_faces) { 3276 for (i=0;i<n_ISForFaces;i++) { 3277 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3278 } 3279 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3280 n_ISForFaces = 0; 3281 } 3282 3283 #if defined(PETSC_USE_DEBUG) 3284 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3285 Also use_change_of_basis should be consistent among processors */ 3286 if (pcbddc->NullSpace) { 3287 PetscBool tbool[2],gbool[2]; 3288 3289 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3290 pcbddc->use_change_of_basis = PETSC_TRUE; 3291 if (!ISForEdges) { 3292 pcbddc->use_change_on_faces = PETSC_TRUE; 3293 } 3294 } 3295 tbool[0] = pcbddc->use_change_of_basis; 3296 tbool[1] = pcbddc->use_change_on_faces; 3297 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3298 pcbddc->use_change_of_basis = gbool[0]; 3299 pcbddc->use_change_on_faces = gbool[1]; 3300 } 3301 #endif 3302 3303 /* check if near null space is attached to global mat */ 3304 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3305 if (nearnullsp) { 3306 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3307 /* remove any stored info */ 3308 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3309 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3310 /* store information for BDDC solver reuse */ 3311 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3312 pcbddc->onearnullspace = nearnullsp; 3313 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3314 for (i=0;i<nnsp_size;i++) { 3315 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3316 } 3317 } else { /* if near null space is not provided BDDC uses constants by default */ 3318 nnsp_size = 0; 3319 nnsp_has_cnst = PETSC_TRUE; 3320 } 3321 /* get max number of constraints on a single cc */ 3322 max_constraints = nnsp_size; 3323 if (nnsp_has_cnst) max_constraints++; 3324 3325 /* 3326 Evaluate maximum storage size needed by the procedure 3327 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3328 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3329 There can be multiple constraints per connected component 3330 */ 3331 n_vertices = 0; 3332 if (ISForVertices) { 3333 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3334 } 3335 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3336 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3337 3338 total_counts = n_ISForFaces+n_ISForEdges; 3339 total_counts *= max_constraints; 3340 total_counts += n_vertices; 3341 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3342 3343 total_counts = 0; 3344 max_size_of_constraint = 0; 3345 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3346 IS used_is; 3347 if (i<n_ISForEdges) { 3348 used_is = ISForEdges[i]; 3349 } else { 3350 used_is = ISForFaces[i-n_ISForEdges]; 3351 } 3352 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3353 total_counts += j; 3354 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3355 } 3356 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3357 3358 /* get local part of global near null space vectors */ 3359 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3360 for (k=0;k<nnsp_size;k++) { 3361 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3362 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3363 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3364 } 3365 3366 /* whether or not to skip lapack calls */ 3367 skip_lapack = PETSC_TRUE; 3368 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3369 3370 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3371 if (!skip_lapack) { 3372 PetscScalar temp_work; 3373 3374 #if defined(PETSC_MISSING_LAPACK_GESVD) 3375 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3376 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3377 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3378 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3379 #if defined(PETSC_USE_COMPLEX) 3380 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3381 #endif 3382 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3383 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3384 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3385 lwork = -1; 3386 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3387 #if !defined(PETSC_USE_COMPLEX) 3388 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3389 #else 3390 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3391 #endif 3392 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3393 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3394 #else /* on missing GESVD */ 3395 /* SVD */ 3396 PetscInt max_n,min_n; 3397 max_n = max_size_of_constraint; 3398 min_n = max_constraints; 3399 if (max_size_of_constraint < max_constraints) { 3400 min_n = max_size_of_constraint; 3401 max_n = max_constraints; 3402 } 3403 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3404 #if defined(PETSC_USE_COMPLEX) 3405 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3406 #endif 3407 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3408 lwork = -1; 3409 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3410 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3411 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3412 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3413 #if !defined(PETSC_USE_COMPLEX) 3414 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3415 #else 3416 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3417 #endif 3418 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3419 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3420 #endif /* on missing GESVD */ 3421 /* Allocate optimal workspace */ 3422 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3423 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3424 } 3425 /* Now we can loop on constraining sets */ 3426 total_counts = 0; 3427 constraints_idxs_ptr[0] = 0; 3428 constraints_data_ptr[0] = 0; 3429 /* vertices */ 3430 if (n_vertices) { 3431 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3432 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3433 for (i=0;i<n_vertices;i++) { 3434 constraints_n[total_counts] = 1; 3435 constraints_data[total_counts] = 1.0; 3436 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3437 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3438 total_counts++; 3439 } 3440 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3441 n_vertices = total_counts; 3442 } 3443 3444 /* edges and faces */ 3445 total_counts_cc = total_counts; 3446 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3447 IS used_is; 3448 PetscBool idxs_copied = PETSC_FALSE; 3449 3450 if (ncc<n_ISForEdges) { 3451 used_is = ISForEdges[ncc]; 3452 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3453 } else { 3454 used_is = ISForFaces[ncc-n_ISForEdges]; 3455 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3456 } 3457 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3458 3459 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3460 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3461 /* change of basis should not be performed on local periodic nodes */ 3462 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3463 if (nnsp_has_cnst) { 3464 PetscScalar quad_value; 3465 3466 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3467 idxs_copied = PETSC_TRUE; 3468 3469 if (!pcbddc->use_nnsp_true) { 3470 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3471 } else { 3472 quad_value = 1.0; 3473 } 3474 for (j=0;j<size_of_constraint;j++) { 3475 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3476 } 3477 temp_constraints++; 3478 total_counts++; 3479 } 3480 for (k=0;k<nnsp_size;k++) { 3481 PetscReal real_value; 3482 PetscScalar *ptr_to_data; 3483 3484 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3485 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3486 for (j=0;j<size_of_constraint;j++) { 3487 ptr_to_data[j] = array[is_indices[j]]; 3488 } 3489 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3490 /* check if array is null on the connected component */ 3491 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3492 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3493 if (real_value > 0.0) { /* keep indices and values */ 3494 temp_constraints++; 3495 total_counts++; 3496 if (!idxs_copied) { 3497 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3498 idxs_copied = PETSC_TRUE; 3499 } 3500 } 3501 } 3502 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3503 valid_constraints = temp_constraints; 3504 if (!pcbddc->use_nnsp_true && temp_constraints) { 3505 if (temp_constraints == 1) { /* just normalize the constraint */ 3506 PetscScalar norm,*ptr_to_data; 3507 3508 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3510 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3511 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3512 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3513 } else { /* perform SVD */ 3514 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3515 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3516 3517 #if defined(PETSC_MISSING_LAPACK_GESVD) 3518 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3519 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3520 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3521 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3522 from that computed using LAPACKgesvd 3523 -> This is due to a different computation of eigenvectors in LAPACKheev 3524 -> The quality of the POD-computed basis will be the same */ 3525 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3526 /* Store upper triangular part of correlation matrix */ 3527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3528 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3529 for (j=0;j<temp_constraints;j++) { 3530 for (k=0;k<j+1;k++) { 3531 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3532 } 3533 } 3534 /* compute eigenvalues and eigenvectors of correlation matrix */ 3535 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3536 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3537 #if !defined(PETSC_USE_COMPLEX) 3538 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3539 #else 3540 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3541 #endif 3542 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3543 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3544 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3545 j = 0; 3546 while (j < temp_constraints && singular_vals[j] < tol) j++; 3547 total_counts = total_counts-j; 3548 valid_constraints = temp_constraints-j; 3549 /* scale and copy POD basis into used quadrature memory */ 3550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3551 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3552 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3554 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3556 if (j<temp_constraints) { 3557 PetscInt ii; 3558 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3559 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3560 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3561 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3562 for (k=0;k<temp_constraints-j;k++) { 3563 for (ii=0;ii<size_of_constraint;ii++) { 3564 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3565 } 3566 } 3567 } 3568 #else /* on missing GESVD */ 3569 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3570 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3571 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3572 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3573 #if !defined(PETSC_USE_COMPLEX) 3574 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3575 #else 3576 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3577 #endif 3578 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3579 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3580 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3581 k = temp_constraints; 3582 if (k > size_of_constraint) k = size_of_constraint; 3583 j = 0; 3584 while (j < k && singular_vals[k-j-1] < tol) j++; 3585 valid_constraints = k-j; 3586 total_counts = total_counts-temp_constraints+valid_constraints; 3587 #endif /* on missing GESVD */ 3588 } 3589 } 3590 /* update pointers information */ 3591 if (valid_constraints) { 3592 constraints_n[total_counts_cc] = valid_constraints; 3593 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3594 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3595 /* set change_of_basis flag */ 3596 if (boolforchange) { 3597 PetscBTSet(change_basis,total_counts_cc); 3598 } 3599 total_counts_cc++; 3600 } 3601 } 3602 /* free workspace */ 3603 if (!skip_lapack) { 3604 ierr = PetscFree(work);CHKERRQ(ierr); 3605 #if defined(PETSC_USE_COMPLEX) 3606 ierr = PetscFree(rwork);CHKERRQ(ierr); 3607 #endif 3608 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3609 #if defined(PETSC_MISSING_LAPACK_GESVD) 3610 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3611 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3612 #endif 3613 } 3614 for (k=0;k<nnsp_size;k++) { 3615 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3616 } 3617 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3618 /* free index sets of faces, edges and vertices */ 3619 for (i=0;i<n_ISForFaces;i++) { 3620 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3621 } 3622 if (n_ISForFaces) { 3623 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3624 } 3625 for (i=0;i<n_ISForEdges;i++) { 3626 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3627 } 3628 if (n_ISForEdges) { 3629 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3630 } 3631 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3632 } else { 3633 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3634 3635 total_counts = 0; 3636 n_vertices = 0; 3637 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3638 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3639 } 3640 max_constraints = 0; 3641 total_counts_cc = 0; 3642 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3643 total_counts += pcbddc->adaptive_constraints_n[i]; 3644 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3645 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3646 } 3647 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3648 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3649 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3650 constraints_data = pcbddc->adaptive_constraints_data; 3651 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3652 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3653 total_counts_cc = 0; 3654 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3655 if (pcbddc->adaptive_constraints_n[i]) { 3656 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3657 } 3658 } 3659 #if 0 3660 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3661 for (i=0;i<total_counts_cc;i++) { 3662 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3663 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3664 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3665 printf(" %d",constraints_idxs[j]); 3666 } 3667 printf("\n"); 3668 printf("number of cc: %d\n",constraints_n[i]); 3669 } 3670 for (i=0;i<n_vertices;i++) { 3671 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3672 } 3673 for (i=0;i<sub_schurs->n_subs;i++) { 3674 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3675 } 3676 #endif 3677 3678 max_size_of_constraint = 0; 3679 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3680 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3681 /* Change of basis */ 3682 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3683 if (pcbddc->use_change_of_basis) { 3684 for (i=0;i<sub_schurs->n_subs;i++) { 3685 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3686 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3687 } 3688 } 3689 } 3690 } 3691 pcbddc->local_primal_size = total_counts; 3692 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3693 3694 /* map constraints_idxs in boundary numbering */ 3695 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3696 if (i != constraints_idxs_ptr[total_counts_cc]) { 3697 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3698 } 3699 3700 /* Create constraint matrix */ 3701 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3702 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3703 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3704 3705 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3706 /* determine if a QR strategy is needed for change of basis */ 3707 qr_needed = PETSC_FALSE; 3708 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3709 total_primal_vertices=0; 3710 pcbddc->local_primal_size_cc = 0; 3711 for (i=0;i<total_counts_cc;i++) { 3712 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3713 if (size_of_constraint == 1) { 3714 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3715 pcbddc->local_primal_size_cc += 1; 3716 } else if (PetscBTLookup(change_basis,i)) { 3717 for (k=0;k<constraints_n[i];k++) { 3718 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3719 } 3720 pcbddc->local_primal_size_cc += constraints_n[i]; 3721 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3722 PetscBTSet(qr_needed_idx,i); 3723 qr_needed = PETSC_TRUE; 3724 } 3725 } else { 3726 pcbddc->local_primal_size_cc += 1; 3727 } 3728 } 3729 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3730 pcbddc->n_vertices = total_primal_vertices; 3731 /* permute indices in order to have a sorted set of vertices */ 3732 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3733 3734 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3735 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3736 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3737 3738 /* nonzero structure of constraint matrix */ 3739 /* and get reference dof for local constraints */ 3740 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3741 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3742 3743 j = total_primal_vertices; 3744 total_counts = total_primal_vertices; 3745 cum = total_primal_vertices; 3746 for (i=n_vertices;i<total_counts_cc;i++) { 3747 if (!PetscBTLookup(change_basis,i)) { 3748 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3749 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3750 cum++; 3751 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3752 for (k=0;k<constraints_n[i];k++) { 3753 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3754 nnz[j+k] = size_of_constraint; 3755 } 3756 j += constraints_n[i]; 3757 } 3758 } 3759 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3760 ierr = PetscFree(nnz);CHKERRQ(ierr); 3761 3762 /* set values in constraint matrix */ 3763 for (i=0;i<total_primal_vertices;i++) { 3764 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3765 } 3766 total_counts = total_primal_vertices; 3767 for (i=n_vertices;i<total_counts_cc;i++) { 3768 if (!PetscBTLookup(change_basis,i)) { 3769 PetscInt *cols; 3770 3771 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3772 cols = constraints_idxs+constraints_idxs_ptr[i]; 3773 for (k=0;k<constraints_n[i];k++) { 3774 PetscInt row = total_counts+k; 3775 PetscScalar *vals; 3776 3777 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3778 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3779 } 3780 total_counts += constraints_n[i]; 3781 } 3782 } 3783 /* assembling */ 3784 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3785 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3786 3787 /* 3788 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3789 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3790 */ 3791 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3792 if (pcbddc->use_change_of_basis) { 3793 /* dual and primal dofs on a single cc */ 3794 PetscInt dual_dofs,primal_dofs; 3795 /* working stuff for GEQRF */ 3796 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3797 PetscBLASInt lqr_work; 3798 /* working stuff for UNGQR */ 3799 PetscScalar *gqr_work,lgqr_work_t; 3800 PetscBLASInt lgqr_work; 3801 /* working stuff for TRTRS */ 3802 PetscScalar *trs_rhs; 3803 PetscBLASInt Blas_NRHS; 3804 /* pointers for values insertion into change of basis matrix */ 3805 PetscInt *start_rows,*start_cols; 3806 PetscScalar *start_vals; 3807 /* working stuff for values insertion */ 3808 PetscBT is_primal; 3809 PetscInt *aux_primal_numbering_B; 3810 /* matrix sizes */ 3811 PetscInt global_size,local_size; 3812 /* temporary change of basis */ 3813 Mat localChangeOfBasisMatrix; 3814 /* extra space for debugging */ 3815 PetscScalar *dbg_work; 3816 3817 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3818 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3819 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3820 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3821 /* nonzeros for local mat */ 3822 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3823 for (i=0;i<pcis->n;i++) nnz[i]=1; 3824 for (i=n_vertices;i<total_counts_cc;i++) { 3825 if (PetscBTLookup(change_basis,i)) { 3826 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3827 if (PetscBTLookup(qr_needed_idx,i)) { 3828 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3829 } else { 3830 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3831 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3832 } 3833 } 3834 } 3835 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3836 ierr = PetscFree(nnz);CHKERRQ(ierr); 3837 /* Set initial identity in the matrix */ 3838 for (i=0;i<pcis->n;i++) { 3839 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3840 } 3841 3842 if (pcbddc->dbg_flag) { 3843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3845 } 3846 3847 3848 /* Now we loop on the constraints which need a change of basis */ 3849 /* 3850 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3851 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3852 3853 Basic blocks of change of basis matrix T computed by 3854 3855 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3856 3857 | 1 0 ... 0 s_1/S | 3858 | 0 1 ... 0 s_2/S | 3859 | ... | 3860 | 0 ... 1 s_{n-1}/S | 3861 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3862 3863 with S = \sum_{i=1}^n s_i^2 3864 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3865 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3866 3867 - QR decomposition of constraints otherwise 3868 */ 3869 if (qr_needed) { 3870 /* space to store Q */ 3871 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3872 /* first we issue queries for optimal work */ 3873 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3874 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3875 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3876 lqr_work = -1; 3877 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3878 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3879 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3880 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3881 lgqr_work = -1; 3882 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3883 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3884 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3885 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3886 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3887 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3888 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3889 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3890 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3891 /* array to store scaling factors for reflectors */ 3892 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3893 /* array to store rhs and solution of triangular solver */ 3894 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3895 /* allocating workspace for check */ 3896 if (pcbddc->dbg_flag) { 3897 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3898 } 3899 } 3900 /* array to store whether a node is primal or not */ 3901 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3902 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3903 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3904 if (i != total_primal_vertices) { 3905 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3906 } 3907 for (i=0;i<total_primal_vertices;i++) { 3908 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3909 } 3910 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3911 3912 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3913 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3914 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3915 if (PetscBTLookup(change_basis,total_counts)) { 3916 /* get constraint info */ 3917 primal_dofs = constraints_n[total_counts]; 3918 dual_dofs = size_of_constraint-primal_dofs; 3919 3920 if (pcbddc->dbg_flag) { 3921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3922 } 3923 3924 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3925 3926 /* copy quadrature constraints for change of basis check */ 3927 if (pcbddc->dbg_flag) { 3928 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3929 } 3930 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3931 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3932 3933 /* compute QR decomposition of constraints */ 3934 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3935 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3937 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3938 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3939 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3940 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3941 3942 /* explictly compute R^-T */ 3943 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3944 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3945 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3946 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3948 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3949 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3950 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3951 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3953 3954 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3955 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3957 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3959 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3960 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3961 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3962 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3963 3964 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3965 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3966 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3968 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3969 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3971 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3972 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3973 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3974 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3975 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3976 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3977 3978 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3979 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3980 /* insert cols for primal dofs */ 3981 for (j=0;j<primal_dofs;j++) { 3982 start_vals = &qr_basis[j*size_of_constraint]; 3983 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3984 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3985 } 3986 /* insert cols for dual dofs */ 3987 for (j=0,k=0;j<dual_dofs;k++) { 3988 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3989 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3990 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3991 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3992 j++; 3993 } 3994 } 3995 3996 /* check change of basis */ 3997 if (pcbddc->dbg_flag) { 3998 PetscInt ii,jj; 3999 PetscBool valid_qr=PETSC_TRUE; 4000 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4001 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4004 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4005 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4006 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4007 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4008 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4009 for (jj=0;jj<size_of_constraint;jj++) { 4010 for (ii=0;ii<primal_dofs;ii++) { 4011 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4012 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4013 } 4014 } 4015 if (!valid_qr) { 4016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4017 for (jj=0;jj<size_of_constraint;jj++) { 4018 for (ii=0;ii<primal_dofs;ii++) { 4019 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4020 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4021 } 4022 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4023 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4024 } 4025 } 4026 } 4027 } else { 4028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4029 } 4030 } 4031 } else { /* simple transformation block */ 4032 PetscInt row,col; 4033 PetscScalar val,norm; 4034 4035 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4036 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4037 for (j=0;j<size_of_constraint;j++) { 4038 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4039 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4040 if (!PetscBTLookup(is_primal,row_B)) { 4041 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4042 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4043 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4044 } else { 4045 for (k=0;k<size_of_constraint;k++) { 4046 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4047 if (row != col) { 4048 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4049 } else { 4050 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4051 } 4052 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4053 } 4054 } 4055 } 4056 if (pcbddc->dbg_flag) { 4057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4058 } 4059 } 4060 } else { 4061 if (pcbddc->dbg_flag) { 4062 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4063 } 4064 } 4065 } 4066 4067 /* free workspace */ 4068 if (qr_needed) { 4069 if (pcbddc->dbg_flag) { 4070 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4071 } 4072 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4073 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4074 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4075 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4076 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4077 } 4078 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4079 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4080 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4081 4082 /* assembling of global change of variable */ 4083 if (!pcbddc->fake_change) { 4084 Mat tmat; 4085 PetscInt bs; 4086 4087 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4088 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4089 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4090 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4091 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4092 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4093 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4094 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4095 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4096 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4097 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4098 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4099 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4100 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4101 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4102 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4103 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4104 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4105 4106 /* check */ 4107 if (pcbddc->dbg_flag) { 4108 PetscReal error; 4109 Vec x,x_change; 4110 4111 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4112 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4113 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4114 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4115 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4116 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4117 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4118 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4119 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4120 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4121 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4122 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4123 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4124 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4125 ierr = VecDestroy(&x);CHKERRQ(ierr); 4126 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4127 } 4128 } 4129 4130 /* adapt sub_schurs computed (if any) */ 4131 if (pcbddc->use_deluxe_scaling) { 4132 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4133 if (sub_schurs->S_Ej_all) { 4134 Mat S_new,tmat; 4135 ISLocalToGlobalMapping NtoSall; 4136 IS is_all_N,is_V,is_V_Sall; 4137 const PetscScalar *array; 4138 const PetscInt *idxs_V,*idxs_all; 4139 PetscInt i,n_V; 4140 4141 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4142 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4143 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4144 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4145 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4146 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4147 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4148 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4149 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4150 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4151 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4152 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4153 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4154 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4155 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4156 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4157 for (i=0;i<n_V;i++) { 4158 PetscScalar val; 4159 PetscInt idx; 4160 4161 idx = idxs_V[i]; 4162 val = array[idxs_all[idxs_V[i]]]; 4163 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4164 } 4165 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4166 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4167 sub_schurs->S_Ej_all = S_new; 4168 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4169 if (sub_schurs->sum_S_Ej_all) { 4170 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4171 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4172 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4173 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4174 sub_schurs->sum_S_Ej_all = S_new; 4175 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4176 } 4177 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4178 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4179 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4180 if (!pcbddc->fake_change) { 4181 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4182 } else { 4183 if (!sub_schurs->change_primal) { 4184 sub_schurs->change_primal = is_V_Sall; 4185 } else { 4186 #if defined(PETSC_USE_DEBUG) 4187 IS ist; 4188 PetscInt n; 4189 4190 ierr = ISDifference(sub_schurs->change_primal,is_V_Sall,&ist);CHKERRQ(ierr); 4191 ierr = ISGetLocalSize(ist,&n);CHKERRQ(ierr); 4192 if (n) { 4193 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 4194 } 4195 ierr = ISDestroy(&ist);CHKERRQ(ierr); 4196 #endif 4197 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4198 } 4199 if (!sub_schurs->change) { 4200 ierr = KSPCreate(PETSC_COMM_SELF,&sub_schurs->change);CHKERRQ(ierr); 4201 ierr = KSPSetOperators(sub_schurs->change,tmat,tmat);CHKERRQ(ierr); 4202 #if defined(PETSC_USE_DEBUG) 4203 } else { 4204 Mat At; 4205 PetscReal norm; 4206 4207 ierr = KSPGetOperators(sub_schurs->change,&At,NULL);CHKERRQ(ierr); 4208 ierr = MatAXPY(tmat,-1.,At,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4209 ierr = MatNorm(tmat,NORM_INFINITY,&norm);CHKERRQ(ierr); 4210 if (norm > PETSC_SMALL) { 4211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 4212 } 4213 #endif 4214 } 4215 } 4216 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4217 } 4218 } 4219 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4220 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4221 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4222 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4223 } 4224 4225 /* set up change of basis context */ 4226 if (pcbddc->ChangeOfBasisMatrix) { 4227 PCBDDCChange_ctx change_ctx; 4228 4229 if (!pcbddc->new_global_mat) { 4230 PetscInt global_size,local_size; 4231 4232 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4233 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4234 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4235 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4236 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4237 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4238 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4239 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4240 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4241 } else { 4242 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4243 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4244 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4245 } 4246 if (!pcbddc->user_ChangeOfBasisMatrix) { 4247 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4248 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4249 } else { 4250 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4251 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4252 } 4253 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4254 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4255 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4256 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4257 } else { 4258 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4259 } 4260 4261 /* add pressure dofs to set of primal nodes for numbering purposes */ 4262 for (i=0;i<pcbddc->benign_n;i++) { 4263 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4264 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4265 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4266 pcbddc->local_primal_size_cc++; 4267 pcbddc->local_primal_size++; 4268 } 4269 4270 /* check if a new primal space has been introduced (also take into account benign trick) */ 4271 pcbddc->new_primal_space_local = PETSC_TRUE; 4272 if (olocal_primal_size == pcbddc->local_primal_size) { 4273 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4274 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4275 if (!pcbddc->new_primal_space_local) { 4276 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4277 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4278 } 4279 } 4280 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4281 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4282 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4283 4284 /* flush dbg viewer */ 4285 if (pcbddc->dbg_flag) { 4286 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4287 } 4288 4289 /* free workspace */ 4290 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4291 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4292 if (!pcbddc->adaptive_selection) { 4293 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4294 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4295 } else { 4296 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4297 pcbddc->adaptive_constraints_idxs_ptr, 4298 pcbddc->adaptive_constraints_data_ptr, 4299 pcbddc->adaptive_constraints_idxs, 4300 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4301 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4302 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4303 } 4304 PetscFunctionReturn(0); 4305 } 4306 4307 #undef __FUNCT__ 4308 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4309 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4310 { 4311 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4312 PC_IS *pcis = (PC_IS*)pc->data; 4313 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4314 PetscInt ierr,i,vertex_size,N; 4315 PetscViewer viewer=pcbddc->dbg_viewer; 4316 4317 PetscFunctionBegin; 4318 /* Reset previously computed graph */ 4319 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4320 /* Init local Graph struct */ 4321 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4322 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4323 4324 /* Check validity of the csr graph passed in by the user */ 4325 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4326 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4327 } 4328 4329 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4330 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4331 PetscInt *xadj,*adjncy; 4332 PetscInt nvtxs; 4333 PetscBool flg_row=PETSC_FALSE; 4334 4335 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4336 if (flg_row) { 4337 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4338 pcbddc->computed_rowadj = PETSC_TRUE; 4339 } 4340 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4341 } 4342 if (pcbddc->dbg_flag) { 4343 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4344 } 4345 4346 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4347 vertex_size = 1; 4348 if (pcbddc->user_provided_isfordofs) { 4349 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4350 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4351 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4352 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4353 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4354 } 4355 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4356 pcbddc->n_ISForDofs = 0; 4357 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4358 } 4359 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 4360 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4361 } else { 4362 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4363 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4364 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4365 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4366 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4367 } 4368 } 4369 } 4370 4371 /* Setup of Graph */ 4372 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4373 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4374 } 4375 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4376 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4377 } 4378 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4379 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4380 } 4381 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4382 4383 /* attach info on disconnected subdomains if present */ 4384 if (pcbddc->n_local_subs) { 4385 PetscInt *local_subs; 4386 4387 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4388 for (i=0;i<pcbddc->n_local_subs;i++) { 4389 const PetscInt *idxs; 4390 PetscInt nl,j; 4391 4392 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4393 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4394 for (j=0;j<nl;j++) { 4395 local_subs[idxs[j]] = i; 4396 } 4397 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4398 } 4399 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4400 pcbddc->mat_graph->local_subs = local_subs; 4401 } 4402 4403 /* Graph's connected components analysis */ 4404 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4405 4406 /* print some info to stdout */ 4407 if (pcbddc->dbg_flag) { 4408 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4409 } 4410 4411 /* mark topography has done */ 4412 pcbddc->recompute_topography = PETSC_FALSE; 4413 PetscFunctionReturn(0); 4414 } 4415 4416 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "PCBDDCSubsetNumbering" 4419 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4420 { 4421 PetscSF sf; 4422 PetscLayout map; 4423 const PetscInt *idxs; 4424 PetscInt *leaf_data,*root_data,*gidxs; 4425 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4426 PetscInt n_n,nlocals,start,first_index; 4427 PetscMPIInt commsize; 4428 PetscBool first_found; 4429 PetscErrorCode ierr; 4430 4431 PetscFunctionBegin; 4432 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4433 if (subset_mult) { 4434 PetscCheckSameComm(subset,1,subset_mult,2); 4435 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4436 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4437 } 4438 /* create workspace layout for computing global indices of subset */ 4439 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4440 lbounds[0] = lbounds[1] = 0; 4441 for (i=0;i<n;i++) { 4442 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4443 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4444 } 4445 lbounds[0] = -lbounds[0]; 4446 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4447 gbounds[0] = -gbounds[0]; 4448 N = gbounds[1] - gbounds[0] + 1; 4449 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4450 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4451 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4452 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4453 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4454 4455 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4456 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4457 if (subset_mult) { 4458 const PetscInt* idxs_mult; 4459 4460 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4461 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4462 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4463 } else { 4464 for (i=0;i<n;i++) leaf_data[i] = 1; 4465 } 4466 /* local size of new subset */ 4467 n_n = 0; 4468 for (i=0;i<n;i++) n_n += leaf_data[i]; 4469 4470 /* global indexes in layout */ 4471 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4472 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4473 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4474 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4475 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4476 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4477 4478 /* reduce from leaves to roots */ 4479 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4480 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4481 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4482 4483 /* count indexes in local part of layout */ 4484 nlocals = 0; 4485 first_index = -1; 4486 first_found = PETSC_FALSE; 4487 for (i=0;i<Nl;i++) { 4488 if (!first_found && root_data[i]) { 4489 first_found = PETSC_TRUE; 4490 first_index = i; 4491 } 4492 nlocals += root_data[i]; 4493 } 4494 4495 /* cumulative of number of indexes and size of subset without holes */ 4496 #if defined(PETSC_HAVE_MPI_EXSCAN) 4497 start = 0; 4498 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4499 #else 4500 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4501 start = start-nlocals; 4502 #endif 4503 4504 if (N_n) { /* compute total size of new subset if requested */ 4505 *N_n = start + nlocals; 4506 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4507 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4508 } 4509 4510 /* adapt root data with cumulative */ 4511 if (first_found) { 4512 PetscInt old_index; 4513 4514 root_data[first_index] += start; 4515 old_index = first_index; 4516 for (i=first_index+1;i<Nl;i++) { 4517 if (root_data[i]) { 4518 root_data[i] += root_data[old_index]; 4519 old_index = i; 4520 } 4521 } 4522 } 4523 4524 /* from roots to leaves */ 4525 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4526 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4527 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4528 4529 /* create new IS with global indexes without holes */ 4530 if (subset_mult) { 4531 const PetscInt* idxs_mult; 4532 PetscInt cum; 4533 4534 cum = 0; 4535 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4536 for (i=0;i<n;i++) { 4537 PetscInt j; 4538 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4539 } 4540 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4541 } else { 4542 for (i=0;i<n;i++) { 4543 gidxs[i] = leaf_data[i]-1; 4544 } 4545 } 4546 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4547 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4548 PetscFunctionReturn(0); 4549 } 4550 4551 #undef __FUNCT__ 4552 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4553 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4554 { 4555 PetscInt i,j; 4556 PetscScalar *alphas; 4557 PetscErrorCode ierr; 4558 4559 PetscFunctionBegin; 4560 /* this implements stabilized Gram-Schmidt */ 4561 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4562 for (i=0;i<n;i++) { 4563 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4564 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4565 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4566 } 4567 ierr = PetscFree(alphas);CHKERRQ(ierr); 4568 PetscFunctionReturn(0); 4569 } 4570 4571 #undef __FUNCT__ 4572 #define __FUNCT__ "MatISGetSubassemblingPattern" 4573 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4574 { 4575 IS ranks_send_to; 4576 PetscInt n_neighs,*neighs,*n_shared,**shared; 4577 PetscMPIInt size,rank,color; 4578 PetscInt *xadj,*adjncy; 4579 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4580 PetscInt i,local_size,threshold=0; 4581 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4582 PetscSubcomm subcomm; 4583 PetscErrorCode ierr; 4584 4585 PetscFunctionBegin; 4586 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4587 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4588 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4589 4590 /* Get info on mapping */ 4591 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4592 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4593 4594 /* build local CSR graph of subdomains' connectivity */ 4595 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4596 xadj[0] = 0; 4597 xadj[1] = PetscMax(n_neighs-1,0); 4598 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4599 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4600 4601 if (threshold) { 4602 PetscInt xadj_count = 0; 4603 for (i=1;i<n_neighs;i++) { 4604 if (n_shared[i] > threshold) { 4605 adjncy[xadj_count] = neighs[i]; 4606 adjncy_wgt[xadj_count] = n_shared[i]; 4607 xadj_count++; 4608 } 4609 } 4610 xadj[1] = xadj_count; 4611 } else { 4612 if (xadj[1]) { 4613 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4614 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4615 } 4616 } 4617 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4618 if (use_square) { 4619 for (i=0;i<xadj[1];i++) { 4620 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4621 } 4622 } 4623 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4624 4625 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4626 4627 /* 4628 Restrict work on active processes only. 4629 */ 4630 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4631 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4632 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4633 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4634 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4635 if (color) { 4636 ierr = PetscFree(xadj);CHKERRQ(ierr); 4637 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4638 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4639 } else { 4640 Mat subdomain_adj; 4641 IS new_ranks,new_ranks_contig; 4642 MatPartitioning partitioner; 4643 PetscInt prank,rstart=0,rend=0; 4644 PetscInt *is_indices,*oldranks; 4645 PetscBool aggregate; 4646 4647 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4648 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4649 prank = rank; 4650 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4651 /* 4652 for (i=0;i<size;i++) { 4653 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4654 } 4655 */ 4656 for (i=0;i<xadj[1];i++) { 4657 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4658 } 4659 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4660 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4661 if (aggregate) { 4662 PetscInt lrows,row,ncols,*cols; 4663 PetscMPIInt nrank; 4664 PetscScalar *vals; 4665 4666 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4667 lrows = 0; 4668 if (nrank<redprocs) { 4669 lrows = size/redprocs; 4670 if (nrank<size%redprocs) lrows++; 4671 } 4672 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4673 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4674 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4675 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4676 row = nrank; 4677 ncols = xadj[1]-xadj[0]; 4678 cols = adjncy; 4679 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4680 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4681 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4682 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4683 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4684 ierr = PetscFree(xadj);CHKERRQ(ierr); 4685 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4686 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4687 ierr = PetscFree(vals);CHKERRQ(ierr); 4688 } else { 4689 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4690 } 4691 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4692 4693 /* Partition */ 4694 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4695 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4696 if (use_vwgt) { 4697 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4698 v_wgt[0] = local_size; 4699 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4700 } 4701 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4702 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4703 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4704 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4705 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4706 4707 /* renumber new_ranks to avoid "holes" in new set of processors */ 4708 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4709 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4710 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4711 if (!redprocs) { 4712 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4713 } else { 4714 PetscInt idxs[1]; 4715 PetscMPIInt tag; 4716 MPI_Request *reqs; 4717 4718 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4719 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4720 for (i=rstart;i<rend;i++) { 4721 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4722 } 4723 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4724 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4725 ierr = PetscFree(reqs);CHKERRQ(ierr); 4726 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4727 } 4728 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4729 /* clean up */ 4730 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4731 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4732 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4733 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4734 } 4735 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4736 4737 /* assemble parallel IS for sends */ 4738 i = 1; 4739 if (color) i=0; 4740 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4741 /* get back IS */ 4742 *is_sends = ranks_send_to; 4743 PetscFunctionReturn(0); 4744 } 4745 4746 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4747 4748 #undef __FUNCT__ 4749 #define __FUNCT__ "MatISSubassemble" 4750 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4751 { 4752 Mat local_mat; 4753 IS is_sends_internal; 4754 PetscInt rows,cols,new_local_rows; 4755 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4756 PetscBool ismatis,isdense,newisdense,destroy_mat; 4757 ISLocalToGlobalMapping l2gmap; 4758 PetscInt* l2gmap_indices; 4759 const PetscInt* is_indices; 4760 MatType new_local_type; 4761 /* buffers */ 4762 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4763 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4764 PetscInt *recv_buffer_idxs_local; 4765 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4766 /* MPI */ 4767 MPI_Comm comm,comm_n; 4768 PetscSubcomm subcomm; 4769 PetscMPIInt n_sends,n_recvs,commsize; 4770 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4771 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4772 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4773 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4774 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4775 PetscErrorCode ierr; 4776 4777 PetscFunctionBegin; 4778 /* TODO: add missing checks */ 4779 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4780 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4781 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4782 PetscValidLogicalCollectiveInt(mat,nis,7); 4783 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4784 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4785 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4786 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4787 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4788 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4789 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4790 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4791 PetscInt mrows,mcols,mnrows,mncols; 4792 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4793 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4794 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4795 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4796 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4797 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4798 } 4799 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4800 PetscValidLogicalCollectiveInt(mat,bs,0); 4801 /* prepare IS for sending if not provided */ 4802 if (!is_sends) { 4803 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4804 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4805 } else { 4806 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4807 is_sends_internal = is_sends; 4808 } 4809 4810 /* get comm */ 4811 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4812 4813 /* compute number of sends */ 4814 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4815 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4816 4817 /* compute number of receives */ 4818 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4819 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4820 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4821 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4822 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4823 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4824 ierr = PetscFree(iflags);CHKERRQ(ierr); 4825 4826 /* restrict comm if requested */ 4827 subcomm = 0; 4828 destroy_mat = PETSC_FALSE; 4829 if (restrict_comm) { 4830 PetscMPIInt color,subcommsize; 4831 4832 color = 0; 4833 if (restrict_full) { 4834 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4835 } else { 4836 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4837 } 4838 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4839 subcommsize = commsize - subcommsize; 4840 /* check if reuse has been requested */ 4841 if (reuse == MAT_REUSE_MATRIX) { 4842 if (*mat_n) { 4843 PetscMPIInt subcommsize2; 4844 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4845 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4846 comm_n = PetscObjectComm((PetscObject)*mat_n); 4847 } else { 4848 comm_n = PETSC_COMM_SELF; 4849 } 4850 } else { /* MAT_INITIAL_MATRIX */ 4851 PetscMPIInt rank; 4852 4853 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4854 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4855 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4856 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4857 comm_n = PetscSubcommChild(subcomm); 4858 } 4859 /* flag to destroy *mat_n if not significative */ 4860 if (color) destroy_mat = PETSC_TRUE; 4861 } else { 4862 comm_n = comm; 4863 } 4864 4865 /* prepare send/receive buffers */ 4866 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4867 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4868 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4869 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4870 if (nis) { 4871 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4872 } 4873 4874 /* Get data from local matrices */ 4875 if (!isdense) { 4876 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4877 /* TODO: See below some guidelines on how to prepare the local buffers */ 4878 /* 4879 send_buffer_vals should contain the raw values of the local matrix 4880 send_buffer_idxs should contain: 4881 - MatType_PRIVATE type 4882 - PetscInt size_of_l2gmap 4883 - PetscInt global_row_indices[size_of_l2gmap] 4884 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4885 */ 4886 } else { 4887 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4888 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4889 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4890 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4891 send_buffer_idxs[1] = i; 4892 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4893 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4894 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4895 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4896 for (i=0;i<n_sends;i++) { 4897 ilengths_vals[is_indices[i]] = len*len; 4898 ilengths_idxs[is_indices[i]] = len+2; 4899 } 4900 } 4901 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4902 /* additional is (if any) */ 4903 if (nis) { 4904 PetscMPIInt psum; 4905 PetscInt j; 4906 for (j=0,psum=0;j<nis;j++) { 4907 PetscInt plen; 4908 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4909 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4910 psum += len+1; /* indices + lenght */ 4911 } 4912 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4913 for (j=0,psum=0;j<nis;j++) { 4914 PetscInt plen; 4915 const PetscInt *is_array_idxs; 4916 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4917 send_buffer_idxs_is[psum] = plen; 4918 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4919 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4920 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4921 psum += plen+1; /* indices + lenght */ 4922 } 4923 for (i=0;i<n_sends;i++) { 4924 ilengths_idxs_is[is_indices[i]] = psum; 4925 } 4926 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4927 } 4928 4929 buf_size_idxs = 0; 4930 buf_size_vals = 0; 4931 buf_size_idxs_is = 0; 4932 for (i=0;i<n_recvs;i++) { 4933 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4934 buf_size_vals += (PetscInt)olengths_vals[i]; 4935 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4936 } 4937 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4938 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4939 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4940 4941 /* get new tags for clean communications */ 4942 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4943 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4944 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4945 4946 /* allocate for requests */ 4947 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4948 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4949 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4950 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4951 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4952 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4953 4954 /* communications */ 4955 ptr_idxs = recv_buffer_idxs; 4956 ptr_vals = recv_buffer_vals; 4957 ptr_idxs_is = recv_buffer_idxs_is; 4958 for (i=0;i<n_recvs;i++) { 4959 source_dest = onodes[i]; 4960 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4961 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4962 ptr_idxs += olengths_idxs[i]; 4963 ptr_vals += olengths_vals[i]; 4964 if (nis) { 4965 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4966 ptr_idxs_is += olengths_idxs_is[i]; 4967 } 4968 } 4969 for (i=0;i<n_sends;i++) { 4970 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4971 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4972 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4973 if (nis) { 4974 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4975 } 4976 } 4977 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4978 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4979 4980 /* assemble new l2g map */ 4981 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4982 ptr_idxs = recv_buffer_idxs; 4983 new_local_rows = 0; 4984 for (i=0;i<n_recvs;i++) { 4985 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4986 ptr_idxs += olengths_idxs[i]; 4987 } 4988 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4989 ptr_idxs = recv_buffer_idxs; 4990 new_local_rows = 0; 4991 for (i=0;i<n_recvs;i++) { 4992 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4993 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4994 ptr_idxs += olengths_idxs[i]; 4995 } 4996 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4997 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4998 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4999 5000 /* infer new local matrix type from received local matrices type */ 5001 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5002 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5003 if (n_recvs) { 5004 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5005 ptr_idxs = recv_buffer_idxs; 5006 for (i=0;i<n_recvs;i++) { 5007 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5008 new_local_type_private = MATAIJ_PRIVATE; 5009 break; 5010 } 5011 ptr_idxs += olengths_idxs[i]; 5012 } 5013 switch (new_local_type_private) { 5014 case MATDENSE_PRIVATE: 5015 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5016 new_local_type = MATSEQAIJ; 5017 bs = 1; 5018 } else { /* if I receive only 1 dense matrix */ 5019 new_local_type = MATSEQDENSE; 5020 bs = 1; 5021 } 5022 break; 5023 case MATAIJ_PRIVATE: 5024 new_local_type = MATSEQAIJ; 5025 bs = 1; 5026 break; 5027 case MATBAIJ_PRIVATE: 5028 new_local_type = MATSEQBAIJ; 5029 break; 5030 case MATSBAIJ_PRIVATE: 5031 new_local_type = MATSEQSBAIJ; 5032 break; 5033 default: 5034 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5035 break; 5036 } 5037 } else { /* by default, new_local_type is seqdense */ 5038 new_local_type = MATSEQDENSE; 5039 bs = 1; 5040 } 5041 5042 /* create MATIS object if needed */ 5043 if (reuse == MAT_INITIAL_MATRIX) { 5044 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5045 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5046 } else { 5047 /* it also destroys the local matrices */ 5048 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5049 } 5050 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5051 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5052 5053 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5054 5055 /* Global to local map of received indices */ 5056 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5057 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5058 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5059 5060 /* restore attributes -> type of incoming data and its size */ 5061 buf_size_idxs = 0; 5062 for (i=0;i<n_recvs;i++) { 5063 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5064 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5065 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5066 } 5067 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5068 5069 /* set preallocation */ 5070 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5071 if (!newisdense) { 5072 PetscInt *new_local_nnz=0; 5073 5074 ptr_vals = recv_buffer_vals; 5075 ptr_idxs = recv_buffer_idxs_local; 5076 if (n_recvs) { 5077 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5078 } 5079 for (i=0;i<n_recvs;i++) { 5080 PetscInt j; 5081 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5082 for (j=0;j<*(ptr_idxs+1);j++) { 5083 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5084 } 5085 } else { 5086 /* TODO */ 5087 } 5088 ptr_idxs += olengths_idxs[i]; 5089 } 5090 if (new_local_nnz) { 5091 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5092 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5093 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5094 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5095 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5096 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5097 } else { 5098 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5099 } 5100 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5101 } else { 5102 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5103 } 5104 5105 /* set values */ 5106 ptr_vals = recv_buffer_vals; 5107 ptr_idxs = recv_buffer_idxs_local; 5108 for (i=0;i<n_recvs;i++) { 5109 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5110 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5111 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5112 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5113 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5114 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5115 } else { 5116 /* TODO */ 5117 } 5118 ptr_idxs += olengths_idxs[i]; 5119 ptr_vals += olengths_vals[i]; 5120 } 5121 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5122 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5123 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5124 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5125 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5126 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5127 5128 #if 0 5129 if (!restrict_comm) { /* check */ 5130 Vec lvec,rvec; 5131 PetscReal infty_error; 5132 5133 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5134 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5135 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5136 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5137 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5138 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5139 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5140 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5141 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5142 } 5143 #endif 5144 5145 /* assemble new additional is (if any) */ 5146 if (nis) { 5147 PetscInt **temp_idxs,*count_is,j,psum; 5148 5149 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5150 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5151 ptr_idxs = recv_buffer_idxs_is; 5152 psum = 0; 5153 for (i=0;i<n_recvs;i++) { 5154 for (j=0;j<nis;j++) { 5155 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5156 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5157 psum += plen; 5158 ptr_idxs += plen+1; /* shift pointer to received data */ 5159 } 5160 } 5161 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5162 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5163 for (i=1;i<nis;i++) { 5164 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5165 } 5166 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5167 ptr_idxs = recv_buffer_idxs_is; 5168 for (i=0;i<n_recvs;i++) { 5169 for (j=0;j<nis;j++) { 5170 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5171 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5172 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5173 ptr_idxs += plen+1; /* shift pointer to received data */ 5174 } 5175 } 5176 for (i=0;i<nis;i++) { 5177 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5178 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5179 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5180 } 5181 ierr = PetscFree(count_is);CHKERRQ(ierr); 5182 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5183 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5184 } 5185 /* free workspace */ 5186 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5187 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5188 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5189 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5190 if (isdense) { 5191 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5192 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5193 } else { 5194 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5195 } 5196 if (nis) { 5197 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5198 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5199 } 5200 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5201 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5202 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5203 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5204 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5205 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5206 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5207 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5208 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5209 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5210 ierr = PetscFree(onodes);CHKERRQ(ierr); 5211 if (nis) { 5212 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5213 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5214 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5215 } 5216 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5217 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5218 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5219 for (i=0;i<nis;i++) { 5220 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5221 } 5222 *mat_n = NULL; 5223 } 5224 PetscFunctionReturn(0); 5225 } 5226 5227 /* temporary hack into ksp private data structure */ 5228 #include <petsc/private/kspimpl.h> 5229 5230 #undef __FUNCT__ 5231 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5232 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5233 { 5234 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5235 PC_IS *pcis = (PC_IS*)pc->data; 5236 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5237 MatNullSpace CoarseNullSpace=NULL; 5238 ISLocalToGlobalMapping coarse_islg; 5239 IS coarse_is,*isarray; 5240 PetscInt i,im_active=-1,active_procs=-1; 5241 PetscInt nis,nisdofs,nisneu,nisvert; 5242 PC pc_temp; 5243 PCType coarse_pc_type; 5244 KSPType coarse_ksp_type; 5245 PetscBool multilevel_requested,multilevel_allowed; 5246 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5247 Mat t_coarse_mat_is; 5248 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5249 PetscMPIInt all_procs; 5250 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5251 PetscBool compute_vecs = PETSC_FALSE; 5252 PetscScalar *array; 5253 PetscErrorCode ierr; 5254 5255 PetscFunctionBegin; 5256 /* Assign global numbering to coarse dofs */ 5257 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5258 PetscInt ocoarse_size; 5259 compute_vecs = PETSC_TRUE; 5260 ocoarse_size = pcbddc->coarse_size; 5261 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5262 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5263 /* see if we can avoid some work */ 5264 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5265 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5266 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5267 PC pc; 5268 PetscBool isbddc; 5269 5270 /* temporary workaround since PCBDDC does not have a reset method so far */ 5271 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5272 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5273 if (isbddc) { 5274 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5275 } else { 5276 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5277 } 5278 coarse_reuse = PETSC_FALSE; 5279 } else { /* we can safely reuse already computed coarse matrix */ 5280 coarse_reuse = PETSC_TRUE; 5281 } 5282 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5283 coarse_reuse = PETSC_FALSE; 5284 } 5285 /* reset any subassembling information */ 5286 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5287 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5288 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5289 coarse_reuse = PETSC_TRUE; 5290 } 5291 5292 /* count "active" (i.e. with positive local size) and "void" processes */ 5293 im_active = !!(pcis->n); 5294 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5295 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5296 void_procs = all_procs-active_procs; 5297 csin_type_simple = PETSC_TRUE; 5298 redist = PETSC_FALSE; 5299 if (pcbddc->current_level && void_procs) { 5300 csin_ml = PETSC_TRUE; 5301 ncoarse_ml = void_procs; 5302 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5303 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5304 csin_ds = PETSC_TRUE; 5305 ncoarse_ds = pcbddc->redistribute_coarse; 5306 redist = PETSC_TRUE; 5307 } else { 5308 csin_ds = PETSC_TRUE; 5309 ncoarse_ds = active_procs; 5310 redist = PETSC_TRUE; 5311 } 5312 } else { 5313 csin_ml = PETSC_FALSE; 5314 ncoarse_ml = all_procs; 5315 if (void_procs) { 5316 csin_ds = PETSC_TRUE; 5317 ncoarse_ds = void_procs; 5318 csin_type_simple = PETSC_FALSE; 5319 } else { 5320 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5321 csin_ds = PETSC_TRUE; 5322 ncoarse_ds = pcbddc->redistribute_coarse; 5323 redist = PETSC_TRUE; 5324 } else { 5325 csin_ds = PETSC_FALSE; 5326 ncoarse_ds = all_procs; 5327 } 5328 } 5329 } 5330 5331 /* 5332 test if we can go multilevel: three conditions must be satisfied: 5333 - we have not exceeded the number of levels requested 5334 - we can actually subassemble the active processes 5335 - we can find a suitable number of MPI processes where we can place the subassembled problem 5336 */ 5337 multilevel_allowed = PETSC_FALSE; 5338 multilevel_requested = PETSC_FALSE; 5339 if (pcbddc->current_level < pcbddc->max_levels) { 5340 multilevel_requested = PETSC_TRUE; 5341 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5342 multilevel_allowed = PETSC_FALSE; 5343 } else { 5344 multilevel_allowed = PETSC_TRUE; 5345 } 5346 } 5347 /* determine number of process partecipating to coarse solver */ 5348 if (multilevel_allowed) { 5349 ncoarse = ncoarse_ml; 5350 csin = csin_ml; 5351 redist = PETSC_FALSE; 5352 } else { 5353 ncoarse = ncoarse_ds; 5354 csin = csin_ds; 5355 } 5356 5357 /* creates temporary l2gmap and IS for coarse indexes */ 5358 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5359 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5360 5361 /* creates temporary MATIS object for coarse matrix */ 5362 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5363 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5364 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5365 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5366 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5367 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5368 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5369 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5370 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5371 5372 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5373 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5374 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5375 const PetscInt *idxs; 5376 ISLocalToGlobalMapping tmap; 5377 5378 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5379 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5380 /* allocate space for temporary storage */ 5381 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5382 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5383 /* allocate for IS array */ 5384 nisdofs = pcbddc->n_ISForDofsLocal; 5385 nisneu = !!pcbddc->NeumannBoundariesLocal; 5386 nisvert = 0; 5387 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5388 nisvert = 1; 5389 } 5390 nis = nisdofs + nisneu + nisvert; 5391 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5392 /* dofs splitting */ 5393 for (i=0;i<nisdofs;i++) { 5394 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5395 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5396 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5397 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5398 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5399 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5400 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5401 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5402 } 5403 /* neumann boundaries */ 5404 if (pcbddc->NeumannBoundariesLocal) { 5405 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5406 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5407 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5408 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5409 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5410 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5411 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5412 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5413 } 5414 /* primal vertices (benign) */ 5415 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5416 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5417 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5418 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5419 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5420 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5421 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5422 } 5423 /* free memory */ 5424 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5425 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5426 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5427 } else { 5428 nis = 0; 5429 nisdofs = 0; 5430 nisneu = 0; 5431 nisvert = 0; 5432 isarray = NULL; 5433 } 5434 /* destroy no longer needed map */ 5435 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5436 5437 /* restrict on coarse candidates (if needed) */ 5438 coarse_mat_is = NULL; 5439 if (csin) { 5440 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5441 if (redist) { 5442 PetscMPIInt rank; 5443 PetscInt spc,n_spc_p1,dest[1],destsize; 5444 5445 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5446 spc = active_procs/ncoarse; 5447 n_spc_p1 = active_procs%ncoarse; 5448 if (im_active) { 5449 destsize = 1; 5450 if (rank > n_spc_p1*(spc+1)-1) { 5451 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5452 } else { 5453 dest[0] = rank/(spc+1); 5454 } 5455 } else { 5456 destsize = 0; 5457 } 5458 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5459 } else if (csin_type_simple) { 5460 PetscMPIInt rank; 5461 PetscInt issize,isidx; 5462 5463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5464 if (im_active) { 5465 issize = 1; 5466 isidx = (PetscInt)rank; 5467 } else { 5468 issize = 0; 5469 isidx = -1; 5470 } 5471 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5472 } else { /* get a suitable subassembling pattern from MATIS code */ 5473 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5474 } 5475 5476 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5477 if (!redist || ncoarse <= void_procs) { 5478 PetscInt ncoarse_cand,tissize,*nisindices; 5479 PetscInt *coarse_candidates; 5480 const PetscInt* tisindices; 5481 5482 /* get coarse candidates' ranks in pc communicator */ 5483 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5484 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5485 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5486 if (!coarse_candidates[i]) { 5487 coarse_candidates[ncoarse_cand++]=i; 5488 } 5489 } 5490 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5491 5492 5493 if (pcbddc->dbg_flag) { 5494 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5495 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5496 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5497 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5498 for (i=0;i<ncoarse_cand;i++) { 5499 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5500 } 5501 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5502 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5503 } 5504 /* shift the pattern on coarse candidates */ 5505 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5506 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5507 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5508 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5509 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5510 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5511 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5512 } 5513 if (pcbddc->dbg_flag) { 5514 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5515 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5516 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5517 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5518 } 5519 } 5520 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5521 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5522 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5523 } else { /* this is the last level, so use just receiving processes in subcomm */ 5524 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5525 } 5526 } else { 5527 if (pcbddc->dbg_flag) { 5528 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5529 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5530 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5531 } 5532 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5533 coarse_mat_is = t_coarse_mat_is; 5534 } 5535 5536 /* create local to global scatters for coarse problem */ 5537 if (compute_vecs) { 5538 PetscInt lrows; 5539 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5540 if (coarse_mat_is) { 5541 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5542 } else { 5543 lrows = 0; 5544 } 5545 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5546 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5547 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5548 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5549 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5550 } 5551 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5552 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5553 5554 /* set defaults for coarse KSP and PC */ 5555 if (multilevel_allowed) { 5556 coarse_ksp_type = KSPRICHARDSON; 5557 coarse_pc_type = PCBDDC; 5558 } else { 5559 coarse_ksp_type = KSPPREONLY; 5560 coarse_pc_type = PCREDUNDANT; 5561 } 5562 5563 /* print some info if requested */ 5564 if (pcbddc->dbg_flag) { 5565 if (!multilevel_allowed) { 5566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5567 if (multilevel_requested) { 5568 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5569 } else if (pcbddc->max_levels) { 5570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5571 } 5572 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5573 } 5574 } 5575 5576 /* create the coarse KSP object only once with defaults */ 5577 if (coarse_mat_is) { 5578 MatReuse coarse_mat_reuse; 5579 PetscViewer dbg_viewer = NULL; 5580 if (pcbddc->dbg_flag) { 5581 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5582 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5583 } 5584 if (!pcbddc->coarse_ksp) { 5585 char prefix[256],str_level[16]; 5586 size_t len; 5587 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5588 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5589 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5590 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5591 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5592 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5593 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5594 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5595 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5596 /* prefix */ 5597 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5598 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5599 if (!pcbddc->current_level) { 5600 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5601 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5602 } else { 5603 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5604 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5605 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5606 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5607 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5608 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5609 } 5610 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5611 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5612 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5613 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5614 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5615 /* allow user customization */ 5616 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5617 } 5618 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5619 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5620 if (nisdofs) { 5621 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5622 for (i=0;i<nisdofs;i++) { 5623 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5624 } 5625 } 5626 if (nisneu) { 5627 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5628 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5629 } 5630 if (nisvert) { 5631 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5632 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5633 } 5634 5635 /* get some info after set from options */ 5636 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5637 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5638 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5639 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5640 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5641 isbddc = PETSC_FALSE; 5642 } 5643 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5644 if (isredundant) { 5645 KSP inner_ksp; 5646 PC inner_pc; 5647 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5648 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5649 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5650 } 5651 5652 /* assemble coarse matrix */ 5653 if (coarse_reuse) { 5654 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5655 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5656 coarse_mat_reuse = MAT_REUSE_MATRIX; 5657 } else { 5658 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5659 } 5660 if (isbddc || isnn) { 5661 if (isbddc) { /* currently there's no API for this */ 5662 PC_BDDC* pcbddc = (PC_BDDC*)pc_temp->data; 5663 pcbddc->detect_disconnected = PETSC_TRUE; 5664 } 5665 if (pcbddc->coarsening_ratio > 1) { 5666 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5667 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5668 if (pcbddc->dbg_flag) { 5669 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5670 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5671 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5672 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5673 } 5674 } 5675 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5676 } else { 5677 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5678 coarse_mat = coarse_mat_is; 5679 } 5680 } else { 5681 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5682 } 5683 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5684 5685 /* propagate symmetry info of coarse matrix */ 5686 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5687 if (pc->pmat->symmetric_set) { 5688 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5689 } 5690 if (pc->pmat->hermitian_set) { 5691 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5692 } 5693 if (pc->pmat->spd_set) { 5694 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5695 } 5696 /* set operators */ 5697 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5698 if (pcbddc->dbg_flag) { 5699 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5700 } 5701 } else { /* processes non partecipating to coarse solver (if any) */ 5702 coarse_mat = 0; 5703 } 5704 ierr = PetscFree(isarray);CHKERRQ(ierr); 5705 #if 0 5706 { 5707 PetscViewer viewer; 5708 char filename[256]; 5709 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5710 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5711 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5712 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5713 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5714 } 5715 #endif 5716 5717 /* Compute coarse null space (special handling by BDDC only) */ 5718 #if 0 5719 if (pcbddc->NullSpace) { 5720 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5721 } 5722 #endif 5723 /* hack */ 5724 if (pcbddc->coarse_ksp) { 5725 Vec crhs,csol; 5726 5727 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5728 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5729 if (!csol) { 5730 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5731 } 5732 if (!crhs) { 5733 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5734 } 5735 } 5736 5737 /* compute null space for coarse solver if the benign trick has been requested */ 5738 if (pcbddc->benign_null) { 5739 5740 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5741 for (i=0;i<pcbddc->benign_n;i++) { 5742 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5743 } 5744 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5745 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5746 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5747 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5748 if (coarse_mat) { 5749 Vec nullv; 5750 PetscScalar *array,*array2; 5751 PetscInt nl; 5752 5753 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5754 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5755 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5756 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5757 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5758 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5759 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5760 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5761 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5762 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5763 } 5764 } 5765 5766 if (pcbddc->coarse_ksp) { 5767 PetscBool ispreonly; 5768 5769 if (CoarseNullSpace) { 5770 PetscBool isnull; 5771 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5772 if (0) { 5773 if (isbddc && !pcbddc->benign_saddle_point) { 5774 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5775 } else { 5776 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5777 } 5778 } else { 5779 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5780 } 5781 } 5782 /* setup coarse ksp */ 5783 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5784 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5785 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5786 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5787 KSP check_ksp; 5788 KSPType check_ksp_type; 5789 PC check_pc; 5790 Vec check_vec,coarse_vec; 5791 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5792 PetscInt its; 5793 PetscBool compute_eigs; 5794 PetscReal *eigs_r,*eigs_c; 5795 PetscInt neigs; 5796 const char *prefix; 5797 5798 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5799 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5800 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5801 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5802 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5803 if (ispreonly) { 5804 check_ksp_type = KSPPREONLY; 5805 compute_eigs = PETSC_FALSE; 5806 } else { 5807 check_ksp_type = KSPGMRES; 5808 compute_eigs = PETSC_TRUE; 5809 } 5810 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5811 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5812 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5813 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5814 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5815 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5816 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5817 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5818 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5819 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5820 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5821 /* create random vec */ 5822 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5823 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5824 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5825 if (CoarseNullSpace) { 5826 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5827 } 5828 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5829 /* solve coarse problem */ 5830 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5831 if (CoarseNullSpace) { 5832 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5833 } 5834 /* set eigenvalue estimation if preonly has not been requested */ 5835 if (compute_eigs) { 5836 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5837 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5838 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5839 lambda_max = eigs_r[neigs-1]; 5840 lambda_min = eigs_r[0]; 5841 if (pcbddc->use_coarse_estimates) { 5842 if (lambda_max>lambda_min) { 5843 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5844 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5845 } 5846 } 5847 } 5848 5849 /* check coarse problem residual error */ 5850 if (pcbddc->dbg_flag) { 5851 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5852 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5853 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5854 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5855 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5856 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5857 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5858 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5859 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5860 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5861 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5862 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5863 if (CoarseNullSpace) { 5864 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5865 } 5866 if (compute_eigs) { 5867 PetscReal lambda_max_s,lambda_min_s; 5868 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5869 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5870 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5871 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5872 for (i=0;i<neigs;i++) { 5873 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5874 } 5875 } 5876 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5877 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5878 } 5879 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5880 if (compute_eigs) { 5881 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5882 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5883 } 5884 } 5885 } 5886 /* print additional info */ 5887 if (pcbddc->dbg_flag) { 5888 /* waits until all processes reaches this point */ 5889 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5890 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5892 } 5893 5894 /* free memory */ 5895 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5896 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5897 PetscFunctionReturn(0); 5898 } 5899 5900 #undef __FUNCT__ 5901 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5902 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5903 { 5904 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5905 PC_IS* pcis = (PC_IS*)pc->data; 5906 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5907 IS subset,subset_mult,subset_n; 5908 PetscInt local_size,coarse_size=0; 5909 PetscInt *local_primal_indices=NULL; 5910 const PetscInt *t_local_primal_indices; 5911 PetscErrorCode ierr; 5912 5913 PetscFunctionBegin; 5914 /* Compute global number of coarse dofs */ 5915 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5916 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5917 } 5918 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5919 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5920 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5921 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5922 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5923 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5924 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5925 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5926 if (local_size != pcbddc->local_primal_size) { 5927 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5928 } 5929 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5930 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5931 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5932 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5933 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5934 5935 /* check numbering */ 5936 if (pcbddc->dbg_flag) { 5937 PetscScalar coarsesum,*array,*array2; 5938 PetscInt i; 5939 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5940 5941 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5943 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5944 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5945 /* counter */ 5946 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5947 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5948 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5949 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5950 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5951 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5952 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5953 for (i=0;i<pcbddc->local_primal_size;i++) { 5954 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5955 } 5956 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5957 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5958 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5959 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5960 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5961 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5962 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5963 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5964 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5965 for (i=0;i<pcis->n;i++) { 5966 if (array[i] != 0.0 && array[i] != array2[i]) { 5967 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5968 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5969 set_error = PETSC_TRUE; 5970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5971 } 5972 } 5973 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5974 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5976 for (i=0;i<pcis->n;i++) { 5977 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5978 } 5979 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5980 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5981 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5982 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5983 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5984 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5985 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5986 PetscInt *gidxs; 5987 5988 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5989 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5990 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5993 for (i=0;i<pcbddc->local_primal_size;i++) { 5994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5995 } 5996 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5997 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5998 } 5999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6000 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6001 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6002 } 6003 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6004 /* get back data */ 6005 *coarse_size_n = coarse_size; 6006 *local_primal_indices_n = local_primal_indices; 6007 PetscFunctionReturn(0); 6008 } 6009 6010 #undef __FUNCT__ 6011 #define __FUNCT__ "PCBDDCGlobalToLocal" 6012 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6013 { 6014 IS localis_t; 6015 PetscInt i,lsize,*idxs,n; 6016 PetscScalar *vals; 6017 PetscErrorCode ierr; 6018 6019 PetscFunctionBegin; 6020 /* get indices in local ordering exploiting local to global map */ 6021 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6022 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6023 for (i=0;i<lsize;i++) vals[i] = 1.0; 6024 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6025 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6026 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6027 if (idxs) { /* multilevel guard */ 6028 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6029 } 6030 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6031 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6032 ierr = PetscFree(vals);CHKERRQ(ierr); 6033 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6034 /* now compute set in local ordering */ 6035 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6036 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6037 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6038 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6039 for (i=0,lsize=0;i<n;i++) { 6040 if (PetscRealPart(vals[i]) > 0.5) { 6041 lsize++; 6042 } 6043 } 6044 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6045 for (i=0,lsize=0;i<n;i++) { 6046 if (PetscRealPart(vals[i]) > 0.5) { 6047 idxs[lsize++] = i; 6048 } 6049 } 6050 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6051 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6052 *localis = localis_t; 6053 PetscFunctionReturn(0); 6054 } 6055 6056 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6057 #undef __FUNCT__ 6058 #define __FUNCT__ "PCBDDCMatMult_Private" 6059 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6060 { 6061 PCBDDCChange_ctx change_ctx; 6062 PetscErrorCode ierr; 6063 6064 PetscFunctionBegin; 6065 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6066 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6067 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6068 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6069 PetscFunctionReturn(0); 6070 } 6071 6072 #undef __FUNCT__ 6073 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6074 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6075 { 6076 PCBDDCChange_ctx change_ctx; 6077 PetscErrorCode ierr; 6078 6079 PetscFunctionBegin; 6080 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6081 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6082 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6083 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6084 PetscFunctionReturn(0); 6085 } 6086 6087 #undef __FUNCT__ 6088 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6089 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6090 { 6091 PC_IS *pcis=(PC_IS*)pc->data; 6092 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6093 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6094 Mat S_j; 6095 PetscInt *used_xadj,*used_adjncy; 6096 PetscBool free_used_adj; 6097 PetscErrorCode ierr; 6098 6099 PetscFunctionBegin; 6100 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6101 free_used_adj = PETSC_FALSE; 6102 if (pcbddc->sub_schurs_layers == -1) { 6103 used_xadj = NULL; 6104 used_adjncy = NULL; 6105 } else { 6106 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6107 used_xadj = pcbddc->mat_graph->xadj; 6108 used_adjncy = pcbddc->mat_graph->adjncy; 6109 } else if (pcbddc->computed_rowadj) { 6110 used_xadj = pcbddc->mat_graph->xadj; 6111 used_adjncy = pcbddc->mat_graph->adjncy; 6112 } else { 6113 PetscBool flg_row=PETSC_FALSE; 6114 const PetscInt *xadj,*adjncy; 6115 PetscInt nvtxs; 6116 6117 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6118 if (flg_row) { 6119 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6120 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6121 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6122 free_used_adj = PETSC_TRUE; 6123 } else { 6124 pcbddc->sub_schurs_layers = -1; 6125 used_xadj = NULL; 6126 used_adjncy = NULL; 6127 } 6128 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6129 } 6130 } 6131 6132 /* setup sub_schurs data */ 6133 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6134 if (!sub_schurs->schur_explicit) { 6135 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6136 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6137 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 6138 } else { 6139 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6140 PetscBool isseqaij; 6141 PetscInt benign_n; 6142 6143 if (!pcbddc->use_vertices && reuse_solvers) { 6144 PetscInt n_vertices; 6145 6146 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6147 reuse_solvers = (PetscBool)!n_vertices; 6148 } 6149 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6150 if (!isseqaij) { 6151 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6152 if (matis->A == pcbddc->local_mat) { 6153 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6154 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6155 } else { 6156 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6157 } 6158 } 6159 if (!pcbddc->benign_change_explicit) { 6160 benign_n = pcbddc->benign_n; 6161 } else { 6162 benign_n = 0; 6163 } 6164 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 6165 /* if the velocities are in the original basis, then we need to eliminate the no-net-flux from the S_Ej 6166 we assume that the user has passed in the associated quadrature weights in the near-null space attached to pc->pmat 6167 We need to compute the change of basis according to the quadrature weights, and this could not be done, at the moment, without some hacking */ 6168 if (pcbddc->benign_saddle_point && !pcbddc->use_change_of_basis && !pcbddc->user_ChangeOfBasisMatrix) { 6169 PC_IS *pcisf; 6170 PC_BDDC *pcbddcf; 6171 PC pcf; 6172 6173 if (pcbddc->sub_schurs_rebuild) { 6174 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6175 } 6176 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6177 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6178 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6179 /* hacks */ 6180 pcisf = (PC_IS*)pcf->data; 6181 pcbddcf = (PC_BDDC*)pcf->data; 6182 *pcisf = *pcis; 6183 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6184 pcbddcf->mat_graph = pcbddc->mat_graph; 6185 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6186 pcbddcf->sub_schurs = sub_schurs; 6187 pcbddcf->use_faces = PETSC_TRUE; 6188 pcbddcf->use_change_of_basis = PETSC_TRUE; 6189 pcbddcf->use_change_on_faces = PETSC_TRUE; 6190 pcbddcf->use_deluxe_scaling = PETSC_TRUE; 6191 pcbddcf->fake_change = PETSC_TRUE; 6192 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6193 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6194 pcbddcf->sub_schurs = NULL; 6195 ierr = MatDestroy(&pcbddcf->ConstraintMatrix);CHKERRQ(ierr); 6196 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6197 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6198 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6199 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6200 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6201 pcf->ops->destroy = NULL; 6202 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6203 } 6204 } 6205 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6206 6207 /* free adjacency */ 6208 if (free_used_adj) { 6209 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6210 } 6211 PetscFunctionReturn(0); 6212 } 6213 6214 #undef __FUNCT__ 6215 #define __FUNCT__ "PCBDDCInitSubSchurs" 6216 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6217 { 6218 PC_IS *pcis=(PC_IS*)pc->data; 6219 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6220 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6221 PCBDDCGraph graph; 6222 PetscErrorCode ierr; 6223 6224 PetscFunctionBegin; 6225 /* attach interface graph for determining subsets */ 6226 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6227 IS verticesIS,verticescomm; 6228 PetscInt vsize,*idxs; 6229 6230 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6231 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6232 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6233 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6234 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6235 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6236 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6237 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6238 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6239 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6240 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6241 /* 6242 if (pcbddc->dbg_flag) { 6243 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6244 } 6245 */ 6246 } else { 6247 graph = pcbddc->mat_graph; 6248 } 6249 6250 /* sub_schurs init */ 6251 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6252 6253 /* free graph struct */ 6254 if (pcbddc->sub_schurs_rebuild) { 6255 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6256 } 6257 PetscFunctionReturn(0); 6258 } 6259 6260 #undef __FUNCT__ 6261 #define __FUNCT__ "PCBDDCCheckOperator" 6262 PetscErrorCode PCBDDCCheckOperator(PC pc) 6263 { 6264 PC_IS *pcis=(PC_IS*)pc->data; 6265 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6266 PetscErrorCode ierr; 6267 6268 PetscFunctionBegin; 6269 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6270 IS zerodiag = NULL; 6271 Mat S_j,B0_B=NULL; 6272 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6273 PetscScalar *p0_check,*array,*array2; 6274 PetscReal norm; 6275 PetscInt i; 6276 6277 /* B0 and B0_B */ 6278 if (zerodiag) { 6279 IS dummy; 6280 6281 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6282 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6283 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6284 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6285 } 6286 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6287 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6288 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6289 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6290 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6291 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6292 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6293 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6294 /* S_j */ 6295 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6296 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6297 6298 /* mimic vector in \widetilde{W}_\Gamma */ 6299 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6300 /* continuous in primal space */ 6301 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6302 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6303 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6304 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6305 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6306 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6307 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6308 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6309 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6310 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6311 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6312 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6313 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6314 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6315 6316 /* assemble rhs for coarse problem */ 6317 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6318 /* local with Schur */ 6319 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6320 if (zerodiag) { 6321 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6322 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6323 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6324 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6325 } 6326 /* sum on primal nodes the local contributions */ 6327 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6328 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6329 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6330 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6331 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6332 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6333 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6334 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6335 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6336 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6337 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6338 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6339 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6340 /* scale primal nodes (BDDC sums contibutions) */ 6341 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6342 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6343 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6344 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6345 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6346 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6347 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6348 /* global: \widetilde{B0}_B w_\Gamma */ 6349 if (zerodiag) { 6350 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6351 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6352 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6353 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6354 } 6355 /* BDDC */ 6356 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6357 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6358 6359 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6360 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6361 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6362 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6363 for (i=0;i<pcbddc->benign_n;i++) { 6364 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6365 } 6366 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6367 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6368 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6369 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6370 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6371 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6372 } 6373 PetscFunctionReturn(0); 6374 } 6375