1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 11 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 12 { 13 PCBDDCBenignMatMult_ctx ctx; 14 PetscErrorCode ierr; 15 PetscBool apply_right,apply_left,reset_x; 16 17 PetscFunctionBegin; 18 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 19 if (transpose) { 20 apply_right = ctx->apply_left; 21 apply_left = ctx->apply_right; 22 } else { 23 apply_right = ctx->apply_right; 24 apply_left = ctx->apply_left; 25 } 26 reset_x = PETSC_FALSE; 27 if (apply_right) { 28 const PetscScalar *ax; 29 PetscInt nl,i; 30 31 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 32 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 33 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 34 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 35 for (i=0;i<ctx->benign_n;i++) { 36 PetscScalar sum,val; 37 const PetscInt *idxs; 38 PetscInt nz,j; 39 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 40 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 41 sum = 0.; 42 if (ctx->apply_p0) { 43 val = ctx->work[idxs[nz-1]]; 44 for (j=0;j<nz-1;j++) { 45 sum += ctx->work[idxs[j]]; 46 ctx->work[idxs[j]] += val; 47 } 48 } else { 49 for (j=0;j<nz-1;j++) { 50 sum += ctx->work[idxs[j]]; 51 } 52 } 53 ctx->work[idxs[nz-1]] -= sum; 54 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 55 } 56 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 57 reset_x = PETSC_TRUE; 58 } 59 if (transpose) { 60 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 61 } else { 62 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 63 } 64 if (reset_x) { 65 ierr = VecResetArray(x);CHKERRQ(ierr); 66 } 67 if (apply_left) { 68 PetscScalar *ay; 69 PetscInt i; 70 71 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 72 for (i=0;i<ctx->benign_n;i++) { 73 PetscScalar sum,val; 74 const PetscInt *idxs; 75 PetscInt nz,j; 76 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 77 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 78 val = -ay[idxs[nz-1]]; 79 if (ctx->apply_p0) { 80 sum = 0.; 81 for (j=0;j<nz-1;j++) { 82 sum += ay[idxs[j]]; 83 ay[idxs[j]] += val; 84 } 85 ay[idxs[nz-1]] += sum; 86 } else { 87 for (j=0;j<nz-1;j++) { 88 ay[idxs[j]] += val; 89 } 90 ay[idxs[nz-1]] = 0.; 91 } 92 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 93 } 94 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 } 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 101 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 102 { 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 112 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "PCBDDCBenignShellMat" 123 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 124 { 125 PC_IS *pcis = (PC_IS*)pc->data; 126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 127 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 128 PCBDDCBenignMatMult_ctx ctx; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!restore) { 133 Mat A,A_IB,A_BI; 134 PetscScalar *work; 135 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 136 137 /* local mat */ 138 ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 139 ierr = MatSetSizes(A,pcis->n,pcis->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 140 ierr = MatSetType(A,MATSHELL);CHKERRQ(ierr); 141 ierr = MatShellSetOperation(A,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 142 ierr = MatShellSetOperation(A,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 143 ierr = PetscNew(&ctx);CHKERRQ(ierr); 144 ierr = MatShellSetContext(A,ctx);CHKERRQ(ierr); 145 ctx->apply_left = PETSC_TRUE; 146 ctx->apply_right = PETSC_TRUE; 147 ctx->apply_p0 = PETSC_TRUE; 148 ctx->benign_n = pcbddc->benign_n; 149 ctx->benign_zerodiag_subs = pcbddc->benign_zerodiag_subs; 150 ctx->A = matis->A; 151 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 152 ctx->work = work; 153 ierr = MatSetUp(A);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 matis->A = A; 157 158 /* A_IB */ 159 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 160 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 161 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 162 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 163 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 164 ierr = PetscNew(&ctx);CHKERRQ(ierr); 165 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 166 ctx->apply_left = PETSC_TRUE; 167 ctx->apply_right = PETSC_FALSE; 168 ctx->apply_p0 = PETSC_FALSE; 169 ctx->benign_n = pcbddc->benign_n; 170 if (reuse) { 171 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 172 } else { /* TODO: could be optimized for successive solves */ 173 ISLocalToGlobalMapping N_to_D; 174 PetscInt i; 175 176 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 177 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 178 for (i=0;i<pcbddc->benign_n;i++) { 179 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 180 } 181 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 182 } 183 ctx->A = pcis->A_IB; 184 ctx->work = work; 185 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 186 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 pcis->A_IB = A_IB; 189 190 /* A_BI as A_IB^T */ 191 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 192 pcbddc->benign_original_mat = pcis->A_BI; 193 pcis->A_BI = A_BI; 194 } else { 195 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 196 197 ierr = MatShellGetContext(matis->A,&ctx);CHKERRQ(ierr); 198 ierr = MatDestroy(&matis->A);CHKERRQ(ierr); 199 matis->A = ctx->A; 200 ierr = PetscFree(ctx);CHKERRQ(ierr); 201 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 202 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 203 pcis->A_IB = ctx->A; 204 if (!reuse) { 205 PetscInt i; 206 for (i=0;i<pcbddc->benign_n;i++) { 207 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 208 } 209 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 210 } 211 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 212 ierr = PetscFree(ctx);CHKERRQ(ierr); 213 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 214 pcis->A_BI = pcbddc->benign_original_mat; 215 pcbddc->benign_original_mat = NULL; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* used just in bddc debug mode */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "PCBDDCBenignProject" 223 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 224 { 225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 226 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 227 Mat An; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 232 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 233 if (is1) { 234 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 235 ierr = MatDestroy(&An);CHKERRQ(ierr); 236 } else { 237 *B = An; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 /* TODO: add reuse flag */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSeqAIJCompress" 245 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 246 { 247 Mat Bt; 248 PetscScalar *a,*bdata; 249 const PetscInt *ii,*ij; 250 PetscInt m,n,i,nnz,*bii,*bij; 251 PetscBool flg_row; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 256 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 257 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 258 nnz = n; 259 for (i=0;i<ii[n];i++) { 260 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 261 } 262 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 265 nnz = 0; 266 bii[0] = 0; 267 for (i=0;i<n;i++) { 268 PetscInt j; 269 for (j=ii[i];j<ii[i+1];j++) { 270 PetscScalar entry = a[j]; 271 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 272 bij[nnz] = ij[j]; 273 bdata[nnz] = entry; 274 nnz++; 275 } 276 } 277 bii[i+1] = nnz; 278 } 279 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 280 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 281 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 282 { 283 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 284 b->free_a = PETSC_TRUE; 285 b->free_ij = PETSC_TRUE; 286 } 287 *B = Bt; 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDetectDisconnectedComponents" 293 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 294 { 295 Mat B; 296 IS is_dummy,*cc_n; 297 ISLocalToGlobalMapping l2gmap_dummy; 298 PCBDDCGraph graph; 299 PetscInt i,n; 300 PetscInt *xadj,*adjncy; 301 PetscInt *xadj_filtered,*adjncy_filtered; 302 PetscBool flg_row,isseqaij; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!A->rmap->N || !A->cmap->N) { 307 *ncc = 0; 308 *cc = NULL; 309 PetscFunctionReturn(0); 310 } 311 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 312 if (!isseqaij && filter) { 313 PetscBool isseqdense; 314 315 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 316 if (!isseqdense) { 317 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 318 } else { /* TODO: rectangular case and LDA */ 319 PetscScalar *array; 320 PetscReal chop=1.e-6; 321 322 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 323 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 324 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 325 for (i=0;i<n;i++) { 326 PetscInt j; 327 for (j=i+1;j<n;j++) { 328 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 329 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 330 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 331 } 332 } 333 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 334 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 335 } 336 } else { 337 B = A; 338 } 339 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 340 341 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 342 if (filter) { 343 PetscScalar *data; 344 PetscInt j,cum; 345 346 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 347 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 348 cum = 0; 349 for (i=0;i<n;i++) { 350 PetscInt t; 351 352 for (j=xadj[i];j<xadj[i+1];j++) { 353 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 354 continue; 355 } 356 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 357 } 358 t = xadj_filtered[i]; 359 xadj_filtered[i] = cum; 360 cum += t; 361 } 362 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 363 } else { 364 xadj_filtered = NULL; 365 adjncy_filtered = NULL; 366 } 367 368 /* compute local connected components using PCBDDCGraph */ 369 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 371 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 373 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 374 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 375 if (xadj_filtered) { 376 graph->xadj = xadj_filtered; 377 graph->adjncy = adjncy_filtered; 378 } else { 379 graph->xadj = xadj; 380 graph->adjncy = adjncy; 381 } 382 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 383 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 384 /* partial clean up */ 385 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 386 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 387 if (A != B) { 388 ierr = MatDestroy(&B);CHKERRQ(ierr); 389 } 390 391 /* get back data */ 392 if (ncc) *ncc = graph->ncc; 393 if (cc) { 394 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 395 for (i=0;i<graph->ncc;i++) { 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 397 } 398 *cc = cc_n; 399 } 400 /* clean up graph */ 401 graph->xadj = 0; 402 graph->adjncy = 0; 403 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCBDDCBenignCheck" 409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 410 { 411 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 412 PC_IS* pcis = (PC_IS*)(pc->data); 413 IS dirIS = NULL; 414 PetscInt i; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 419 if (zerodiag) { 420 Mat A; 421 Vec vec3_N; 422 PetscScalar *vals; 423 const PetscInt *idxs; 424 PetscInt nz,*count; 425 426 /* p0 */ 427 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 428 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 429 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 430 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 431 for (i=0;i<nz;i++) vals[i] = 1.; 432 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* v_I */ 436 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 437 for (i=0;i<nz;i++) vals[i] = 0.; 438 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 440 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 441 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 442 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 444 if (dirIS) { 445 PetscInt n; 446 447 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 448 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 449 for (i=0;i<n;i++) vals[i] = 0.; 450 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 451 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 452 } 453 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 456 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 457 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 458 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 459 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 460 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 462 } 463 ierr = PetscFree(vals);CHKERRQ(ierr); 464 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 465 466 /* there should not be any pressure dofs lying on the interface */ 467 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 468 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 469 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 470 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 471 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 472 for (i=0;i<nz;i++) { 473 if (count[idxs[i]]) { 474 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 475 } 476 } 477 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 478 ierr = PetscFree(count);CHKERRQ(ierr); 479 } 480 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 481 482 /* check PCBDDCBenignGetOrSetP0 */ 483 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 484 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 485 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 486 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 487 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 488 for (i=0;i<pcbddc->benign_n;i++) { 489 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 490 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 498 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 499 { 500 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 501 IS pressures,zerodiag,*zerodiag_subs; 502 PetscInt nz,n; 503 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 508 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 510 for (n=0;n<pcbddc->benign_n;n++) { 511 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 512 } 513 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 514 pcbddc->benign_n = 0; 515 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 516 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 517 Checks if all the pressure dofs in each subdomain have a zero diagonal 518 If not, a change of basis on pressures is not needed 519 since the local Schur complements are SPD 520 */ 521 has_null_pressures = PETSC_TRUE; 522 have_null = PETSC_TRUE; 523 if (pcbddc->n_ISForDofsLocal) { 524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 525 526 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 527 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 528 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 529 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 531 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 532 if (!sorted) { 533 ierr = ISSort(pressures);CHKERRQ(ierr); 534 } 535 } else { 536 pressures = NULL; 537 } 538 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 539 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 540 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 541 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 542 if (!sorted) { 543 ierr = ISSort(zerodiag);CHKERRQ(ierr); 544 } 545 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 546 if (!nz) { 547 if (n) have_null = PETSC_FALSE; 548 has_null_pressures = PETSC_FALSE; 549 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 550 } 551 recompute_zerodiag = PETSC_FALSE; 552 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 553 zerodiag_subs = NULL; 554 pcbddc->benign_n = 0; 555 if (has_null_pressures) { 556 IS *subs; 557 PetscInt nsubs,i; 558 559 subs = pcbddc->local_subs; 560 nsubs = pcbddc->n_local_subs; 561 if (nsubs > 1) { 562 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 563 for (i=0;i<nsubs;i++) { 564 ISLocalToGlobalMapping l2g; 565 IS t_zerodiag_subs; 566 PetscInt nl; 567 568 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 569 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 571 if (nl) { 572 PetscBool valid = PETSC_TRUE; 573 574 if (pressures) { 575 IS t_pressure_subs; 576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 577 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 578 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 579 } 580 if (valid) { 581 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 582 pcbddc->benign_n++; 583 } else { 584 recompute_zerodiag = PETSC_TRUE; 585 } 586 } 587 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 589 } 590 } else { /* there's just one subdomain (or zero if they have not been detected */ 591 PetscBool valid = PETSC_TRUE; 592 if (pressures) { 593 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 594 } 595 if (valid) { 596 pcbddc->benign_n = 1; 597 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 598 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 599 zerodiag_subs[0] = zerodiag; 600 } 601 } 602 } 603 604 if (!pcbddc->benign_n) { 605 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 606 recompute_zerodiag = PETSC_FALSE; 607 has_null_pressures = PETSC_FALSE; 608 have_null = PETSC_FALSE; 609 } 610 611 /* final check for null pressures */ 612 if (zerodiag && pressures) { 613 PetscInt nz,np; 614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 616 if (nz != np) have_null = PETSC_FALSE; 617 } 618 619 if (recompute_zerodiag) { 620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 621 if (pcbddc->benign_n == 1) { 622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 623 zerodiag = zerodiag_subs[0]; 624 } else { 625 PetscInt i,nzn,*new_idxs; 626 627 nzn = 0; 628 for (i=0;i<pcbddc->benign_n;i++) { 629 PetscInt ns; 630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 631 nzn += ns; 632 } 633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 634 nzn = 0; 635 for (i=0;i<pcbddc->benign_n;i++) { 636 PetscInt ns,*idxs; 637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 641 nzn += ns; 642 } 643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 645 } 646 have_null = PETSC_FALSE; 647 } 648 649 /* no-net-flux */ 650 if (pcbddc->benign_compute_nonetflux) { 651 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 652 MatNullSpace near_null_space; 653 ISLocalToGlobalMapping rmap; 654 Vec quad_vec; 655 PetscScalar *pvals; 656 PetscInt i,np,*dummyins; 657 658 /* create vector to hold quadrature weights */ 659 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 660 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 661 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 662 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 663 664 /* compute B^{(i)T} * 1_p */ 665 np = 0; 666 if (zerodiag) { 667 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 668 } else if (pressures) { 669 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 670 } 671 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 672 for (i=0;i<np;i++) pvals[i] = 1.; 673 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 674 if (np) { 675 const PetscInt *pidxs; 676 677 if (zerodiag) { 678 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 679 } else if (pressures) { 680 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 681 } 682 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 683 if (zerodiag) { 684 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 685 } else if (pressures) { 686 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 687 } 688 } 689 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 690 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 691 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 692 ierr = PetscFree(pvals);CHKERRQ(ierr); 693 694 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 695 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 696 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 697 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 698 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 699 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 700 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 701 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 702 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 703 for (i=0;i<matis->sf_nleaves;i++) { 704 if (PetscGlobalRank != matis->sf_leafdata[i]) { 705 pvals[i] = 0.; 706 } 707 } 708 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 709 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 710 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 711 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 712 ierr = PetscFree(dummyins);CHKERRQ(ierr); 713 714 /* assembly quadrature vec and attach near null space to pmat */ 715 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 716 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 717 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 718 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 719 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 720 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 721 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 722 } 723 724 /* change of basis and p0 dofs */ 725 if (has_null_pressures) { 726 IS zerodiagc; 727 const PetscInt *idxs,*idxsc; 728 PetscInt i,s,*nnz; 729 730 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 731 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 732 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 733 /* local change of basis for pressures */ 734 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 735 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 736 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 737 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 738 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 739 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 740 for (i=0;i<pcbddc->benign_n;i++) { 741 PetscInt nzs,j; 742 743 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 744 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 745 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 746 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 747 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 748 } 749 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 750 ierr = PetscFree(nnz);CHKERRQ(ierr); 751 /* set identity on velocities */ 752 for (i=0;i<n-nz;i++) { 753 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 754 } 755 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 756 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 757 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 758 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 759 /* set change on pressures */ 760 for (s=0;s<pcbddc->benign_n;s++) { 761 PetscScalar *array; 762 PetscInt nzs; 763 764 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 765 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 766 for (i=0;i<nzs-1;i++) { 767 PetscScalar vals[2]; 768 PetscInt cols[2]; 769 770 cols[0] = idxs[i]; 771 cols[1] = idxs[nzs-1]; 772 vals[0] = 1.; 773 vals[1] = 1.; 774 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 775 } 776 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 777 for (i=0;i<nzs-1;i++) array[i] = -1.; 778 array[nzs-1] = 1.; 779 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 780 /* store local idxs for p0 */ 781 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 782 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 783 ierr = PetscFree(array);CHKERRQ(ierr); 784 } 785 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 787 /* project if needed */ 788 if (pcbddc->benign_change_explicit) { 789 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 790 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 791 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 792 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 793 } 794 /* store global idxs for p0 */ 795 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 796 } 797 pcbddc->benign_zerodiag_subs = zerodiag_subs; 798 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 799 800 /* determines if the coarse solver will be singular or not */ 801 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 802 *zerodiaglocal = zerodiag; 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 808 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 809 { 810 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 if (!pcbddc->benign_sf) { 815 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 816 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 817 } 818 if (get) { /* use SF to get values */ 819 PetscScalar *array; 820 821 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 822 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 823 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 824 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 825 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 826 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 828 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 835 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 836 { 837 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 838 PetscErrorCode ierr; 839 840 PetscFunctionBegin; 841 /* TODO: add error checking 842 - avoid nested pop (or push) calls. 843 - cannot push before pop. 844 - cannot call this if pcbddc->local_mat is NULL 845 */ 846 if (!pcbddc->benign_n) { 847 PetscFunctionReturn(0); 848 } 849 if (pop) { 850 if (pcbddc->benign_change_explicit) { 851 IS is_p0; 852 MatReuse reuse; 853 854 /* extract B_0 */ 855 reuse = MAT_INITIAL_MATRIX; 856 if (pcbddc->benign_B0) { 857 reuse = MAT_REUSE_MATRIX; 858 } 859 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 861 /* remove rows and cols from local problem */ 862 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 863 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 864 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 865 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 866 } else { 867 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 868 PetscScalar *vals; 869 PetscInt i,n,*idxs_ins; 870 871 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 872 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 873 if (!pcbddc->benign_B0) { 874 PetscInt *nnz; 875 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 876 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 877 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 878 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 879 for (i=0;i<pcbddc->benign_n;i++) { 880 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 881 nnz[i] = n - nnz[i]; 882 } 883 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 884 ierr = PetscFree(nnz);CHKERRQ(ierr); 885 } 886 887 for (i=0;i<pcbddc->benign_n;i++) { 888 PetscScalar *array; 889 PetscInt *idxs,j,nz,cum; 890 891 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 892 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 893 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 for (j=0;j<nz;j++) vals[j] = 1.; 895 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 896 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 897 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 898 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 899 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 900 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 901 cum = 0; 902 for (j=0;j<n;j++) { 903 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 904 vals[cum] = array[j]; 905 idxs_ins[cum] = j; 906 cum++; 907 } 908 } 909 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 911 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 916 } 917 } else { /* push */ 918 if (pcbddc->benign_change_explicit) { 919 PetscInt i; 920 921 for (i=0;i<pcbddc->benign_n;i++) { 922 PetscScalar *B0_vals; 923 PetscInt *B0_cols,B0_ncol; 924 925 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 926 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 927 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 928 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 929 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 930 } 931 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 933 } else { 934 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "PCBDDCAdaptiveSelection" 942 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 943 { 944 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 945 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 946 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 947 PetscBLASInt *B_iwork,*B_ifail; 948 PetscScalar *work,lwork; 949 PetscScalar *St,*S,*eigv; 950 PetscScalar *Sarray,*Starray; 951 PetscReal *eigs,thresh; 952 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 953 PetscBool allocated_S_St; 954 #if defined(PETSC_USE_COMPLEX) 955 PetscReal *rwork; 956 #endif 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 if (!sub_schurs->schur_explicit) { 961 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 962 } 963 964 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 965 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 966 } 967 968 if (pcbddc->dbg_flag) { 969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 970 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 971 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 972 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 973 } 974 975 if (pcbddc->dbg_flag) { 976 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 977 } 978 979 /* max size of subsets */ 980 mss = 0; 981 for (i=0;i<sub_schurs->n_subs;i++) { 982 PetscInt subset_size; 983 984 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 985 mss = PetscMax(mss,subset_size); 986 } 987 988 /* min/max and threshold */ 989 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 990 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 991 nmax = PetscMax(nmin,nmax); 992 allocated_S_St = PETSC_FALSE; 993 if (nmin) { 994 allocated_S_St = PETSC_TRUE; 995 } 996 997 /* allocate lapack workspace */ 998 cum = cum2 = 0; 999 maxneigs = 0; 1000 for (i=0;i<sub_schurs->n_subs;i++) { 1001 PetscInt n,subset_size; 1002 1003 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1004 n = PetscMin(subset_size,nmax); 1005 cum += subset_size; 1006 cum2 += subset_size*n; 1007 maxneigs = PetscMax(maxneigs,n); 1008 } 1009 if (mss) { 1010 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1011 PetscBLASInt B_itype = 1; 1012 PetscBLASInt B_N = mss; 1013 PetscReal zero = 0.0; 1014 PetscReal eps = 0.0; /* dlamch? */ 1015 1016 B_lwork = -1; 1017 S = NULL; 1018 St = NULL; 1019 eigs = NULL; 1020 eigv = NULL; 1021 B_iwork = NULL; 1022 B_ifail = NULL; 1023 #if defined(PETSC_USE_COMPLEX) 1024 rwork = NULL; 1025 #endif 1026 thresh = 1.0; 1027 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1028 #if defined(PETSC_USE_COMPLEX) 1029 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1030 #else 1031 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1032 #endif 1033 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1034 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1035 } else { 1036 /* TODO */ 1037 } 1038 } else { 1039 lwork = 0; 1040 } 1041 1042 nv = 0; 1043 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1044 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1045 } 1046 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1047 if (allocated_S_St) { 1048 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1049 } 1050 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1051 #if defined(PETSC_USE_COMPLEX) 1052 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1053 #endif 1054 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1055 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1056 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1057 nv+cum,&pcbddc->adaptive_constraints_idxs, 1058 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1059 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1060 1061 maxneigs = 0; 1062 cum = cumarray = 0; 1063 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1064 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1065 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1066 const PetscInt *idxs; 1067 1068 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1069 for (cum=0;cum<nv;cum++) { 1070 pcbddc->adaptive_constraints_n[cum] = 1; 1071 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1072 pcbddc->adaptive_constraints_data[cum] = 1.0; 1073 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1074 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1075 } 1076 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1077 } 1078 1079 if (mss) { /* multilevel */ 1080 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1081 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1082 } 1083 1084 thresh = pcbddc->adaptive_threshold; 1085 for (i=0;i<sub_schurs->n_subs;i++) { 1086 const PetscInt *idxs; 1087 PetscReal infty = PETSC_MAX_REAL; 1088 PetscInt j,subset_size,eigs_start = 0; 1089 PetscBLASInt B_N; 1090 PetscBool same_data = PETSC_FALSE; 1091 1092 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1093 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1094 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1095 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1096 if (sub_schurs->is_hermitian) { 1097 PetscInt j,k; 1098 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1099 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1100 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1101 } 1102 for (j=0;j<subset_size;j++) { 1103 for (k=j;k<subset_size;k++) { 1104 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1105 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1106 } 1107 } 1108 } else { 1109 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1110 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1111 } 1112 } else { 1113 S = Sarray + cumarray; 1114 St = Starray + cumarray; 1115 } 1116 /* see if we can save some work */ 1117 if (sub_schurs->n_subs == 1) { 1118 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1119 } 1120 1121 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1122 B_neigs = 0; 1123 } else { 1124 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1125 PetscBLASInt B_itype = 1; 1126 PetscBLASInt B_IL, B_IU; 1127 PetscReal eps = -1.0; /* dlamch? */ 1128 PetscInt nmin_s; 1129 1130 if (pcbddc->dbg_flag) { 1131 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1132 } 1133 1134 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1135 if (thresh > 1.+PETSC_SMALL) { 1136 1137 /* ask for eigenvalues larger than thresh */ 1138 #if defined(PETSC_USE_COMPLEX) 1139 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1140 #else 1141 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1142 #endif 1143 } else { 1144 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1145 B_IL = 1; 1146 #if defined(PETSC_USE_COMPLEX) 1147 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1148 #else 1149 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1150 #endif 1151 } 1152 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1153 if (B_ierr) { 1154 if (B_ierr < 0 ) { 1155 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1156 } else if (B_ierr <= B_N) { 1157 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1158 } else { 1159 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1160 } 1161 } 1162 1163 if (B_neigs > nmax) { 1164 if (pcbddc->dbg_flag) { 1165 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1166 } 1167 eigs_start = B_neigs -nmax; 1168 B_neigs = nmax; 1169 } 1170 1171 nmin_s = PetscMin(nmin,B_N); 1172 if (B_neigs < nmin_s) { 1173 PetscBLASInt B_neigs2; 1174 1175 B_IU = B_N - B_neigs; 1176 B_IL = B_N - nmin_s + 1; 1177 if (pcbddc->dbg_flag) { 1178 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1179 } 1180 if (sub_schurs->is_hermitian) { 1181 PetscInt j; 1182 for (j=0;j<subset_size;j++) { 1183 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1184 } 1185 for (j=0;j<subset_size;j++) { 1186 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1187 } 1188 } else { 1189 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1190 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1191 } 1192 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1193 #if defined(PETSC_USE_COMPLEX) 1194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1195 #else 1196 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1197 #endif 1198 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1199 B_neigs += B_neigs2; 1200 } 1201 if (B_ierr) { 1202 if (B_ierr < 0 ) { 1203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1204 } else if (B_ierr <= B_N) { 1205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1206 } else { 1207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1208 } 1209 } 1210 if (pcbddc->dbg_flag) { 1211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1212 for (j=0;j<B_neigs;j++) { 1213 if (eigs[j] == 0.0) { 1214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1217 } 1218 } 1219 } 1220 } else { 1221 /* TODO */ 1222 } 1223 } 1224 /* change the basis back to the original one */ 1225 if (sub_schurs->change) { 1226 Mat change,phi,phit; 1227 1228 if (pcbddc->dbg_flag > 1) { 1229 PetscInt ii; 1230 for (ii=0;ii<B_neigs;ii++) { 1231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1232 for (j=0;j<B_N;j++) { 1233 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1234 } 1235 } 1236 } 1237 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1238 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1239 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1240 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1241 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1242 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1243 } 1244 maxneigs = PetscMax(B_neigs,maxneigs); 1245 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1246 if (B_neigs) { 1247 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1248 1249 if (pcbddc->dbg_flag > 1) { 1250 PetscInt ii; 1251 for (ii=0;ii<B_neigs;ii++) { 1252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1253 for (j=0;j<B_N;j++) { 1254 #if defined(PETSC_USE_COMPLEX) 1255 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1256 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1258 #else 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1260 #endif 1261 } 1262 } 1263 } 1264 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1265 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1266 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1267 cum++; 1268 } 1269 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1270 /* shift for next computation */ 1271 cumarray += subset_size*subset_size; 1272 } 1273 if (pcbddc->dbg_flag) { 1274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1275 } 1276 1277 if (mss) { 1278 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1279 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1280 /* destroy matrices (junk) */ 1281 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1282 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1283 } 1284 if (allocated_S_St) { 1285 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1288 #if defined(PETSC_USE_COMPLEX) 1289 ierr = PetscFree(rwork);CHKERRQ(ierr); 1290 #endif 1291 if (pcbddc->dbg_flag) { 1292 PetscInt maxneigs_r; 1293 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "PCBDDCSetUpSolvers" 1301 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1302 { 1303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1304 PetscScalar *coarse_submat_vals; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1309 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1310 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1311 1312 /* Setup local neumann solver ksp_R */ 1313 /* PCBDDCSetUpLocalScatters should be called first! */ 1314 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1315 1316 /* Change global null space passed in by the user if change of basis has been requested */ 1317 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1318 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1319 } 1320 1321 /* 1322 Setup local correction and local part of coarse basis. 1323 Gives back the dense local part of the coarse matrix in column major ordering 1324 */ 1325 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1326 1327 /* Compute total number of coarse nodes and setup coarse solver */ 1328 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1329 1330 /* free */ 1331 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "PCBDDCResetCustomization" 1337 PetscErrorCode PCBDDCResetCustomization(PC pc) 1338 { 1339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1344 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1345 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1346 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1347 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1348 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1349 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1350 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1351 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1352 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1353 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1354 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "PCBDDCResetTopography" 1360 PetscErrorCode PCBDDCResetTopography(PC pc) 1361 { 1362 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1363 PetscInt i; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1368 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1369 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1370 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1371 for (i=0;i<pcbddc->n_local_subs;i++) { 1372 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1373 } 1374 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1375 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "PCBDDCResetSolvers" 1381 PetscErrorCode PCBDDCResetSolvers(PC pc) 1382 { 1383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1388 if (pcbddc->coarse_phi_B) { 1389 PetscScalar *array; 1390 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1391 ierr = PetscFree(array);CHKERRQ(ierr); 1392 } 1393 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1394 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1395 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1396 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1397 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1398 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1399 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1400 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1401 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1402 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1403 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1404 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1405 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1406 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1407 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1408 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1409 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1410 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1411 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1412 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1413 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1414 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1415 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1416 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1417 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1418 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1419 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1420 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1421 if (pcbddc->benign_zerodiag_subs) { 1422 PetscInt i; 1423 for (i=0;i<pcbddc->benign_n;i++) { 1424 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1427 } 1428 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1434 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1435 { 1436 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1437 PC_IS *pcis = (PC_IS*)pc->data; 1438 VecType impVecType; 1439 PetscInt n_constraints,n_R,old_size; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 if (!pcbddc->ConstraintMatrix) { 1444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1445 } 1446 /* get sizes */ 1447 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1448 n_R = pcis->n - pcbddc->n_vertices; 1449 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1450 /* local work vectors (try to avoid unneeded work)*/ 1451 /* R nodes */ 1452 old_size = -1; 1453 if (pcbddc->vec1_R) { 1454 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1455 } 1456 if (n_R != old_size) { 1457 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1458 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1459 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1460 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1461 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1462 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1463 } 1464 /* local primal dofs */ 1465 old_size = -1; 1466 if (pcbddc->vec1_P) { 1467 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1468 } 1469 if (pcbddc->local_primal_size != old_size) { 1470 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1471 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1472 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1473 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1474 } 1475 /* local explicit constraints */ 1476 old_size = -1; 1477 if (pcbddc->vec1_C) { 1478 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1479 } 1480 if (n_constraints && n_constraints != old_size) { 1481 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1483 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1484 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1485 } 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "PCBDDCSetUpCorrection" 1491 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1492 { 1493 PetscErrorCode ierr; 1494 /* pointers to pcis and pcbddc */ 1495 PC_IS* pcis = (PC_IS*)pc->data; 1496 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1497 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1498 /* submatrices of local problem */ 1499 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1500 /* submatrices of local coarse problem */ 1501 Mat S_VV,S_CV,S_VC,S_CC; 1502 /* working matrices */ 1503 Mat C_CR; 1504 /* additional working stuff */ 1505 PC pc_R; 1506 Mat F; 1507 Vec dummy_vec; 1508 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1509 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1510 PetscScalar *work; 1511 PetscInt *idx_V_B; 1512 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1513 PetscInt i,n_R,n_D,n_B; 1514 1515 /* some shortcuts to scalars */ 1516 PetscScalar one=1.0,m_one=-1.0; 1517 1518 PetscFunctionBegin; 1519 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1520 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1521 } 1522 1523 /* Set Non-overlapping dimensions */ 1524 n_vertices = pcbddc->n_vertices; 1525 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1526 n_B = pcis->n_B; 1527 n_D = pcis->n - n_B; 1528 n_R = pcis->n - n_vertices; 1529 1530 /* vertices in boundary numbering */ 1531 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1532 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1533 if (i != n_vertices) { 1534 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1535 } 1536 1537 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1538 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1540 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1542 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1544 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1545 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1546 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1547 1548 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1549 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1550 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1551 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1552 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1553 lda_rhs = n_R; 1554 need_benign_correction = PETSC_FALSE; 1555 if (isLU || isILU || isCHOL) { 1556 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1557 } else if (sub_schurs->reuse_solver) { 1558 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1559 MatFactorType type; 1560 1561 F = reuse_solver->F; 1562 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1563 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1564 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1565 need_benign_correction = !!reuse_solver->benign_n; 1566 } else { 1567 F = NULL; 1568 } 1569 1570 /* allocate workspace */ 1571 n = 0; 1572 if (n_constraints) { 1573 n += lda_rhs*n_constraints; 1574 } 1575 if (n_vertices) { 1576 n = PetscMax(2*lda_rhs*n_vertices,n); 1577 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1578 } 1579 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1580 1581 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1582 dummy_vec = NULL; 1583 if (need_benign_correction && lda_rhs != n_R && F) { 1584 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1585 } 1586 1587 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1588 if (n_constraints) { 1589 Mat M1,M2,M3,C_B; 1590 IS is_aux; 1591 PetscScalar *array,*array2; 1592 1593 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1594 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1595 1596 /* Extract constraints on R nodes: C_{CR} */ 1597 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1598 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1599 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1600 1601 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1602 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1603 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1604 for (i=0;i<n_constraints;i++) { 1605 const PetscScalar *row_cmat_values; 1606 const PetscInt *row_cmat_indices; 1607 PetscInt size_of_constraint,j; 1608 1609 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1610 for (j=0;j<size_of_constraint;j++) { 1611 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1612 } 1613 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1614 } 1615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1616 if (F) { 1617 Mat B; 1618 1619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1620 if (need_benign_correction) { 1621 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1622 1623 /* rhs is already zero on interior dofs, no need to change the rhs */ 1624 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1625 } 1626 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1627 if (need_benign_correction) { 1628 PetscScalar *marr; 1629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1630 1631 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1632 if (lda_rhs != n_R) { 1633 for (i=0;i<n_constraints;i++) { 1634 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1635 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1636 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1637 } 1638 } else { 1639 for (i=0;i<n_constraints;i++) { 1640 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1641 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1642 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1643 } 1644 } 1645 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1646 } 1647 ierr = MatDestroy(&B);CHKERRQ(ierr); 1648 } else { 1649 PetscScalar *marr; 1650 1651 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1652 for (i=0;i<n_constraints;i++) { 1653 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1654 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1655 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1656 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1658 } 1659 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1660 } 1661 if (!pcbddc->switch_static) { 1662 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1663 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1664 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1665 for (i=0;i<n_constraints;i++) { 1666 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1667 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1668 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1670 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1671 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1672 } 1673 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1674 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1675 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1676 } else { 1677 if (lda_rhs != n_R) { 1678 IS dummy; 1679 1680 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1681 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1682 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1683 } else { 1684 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1685 pcbddc->local_auxmat2 = local_auxmat2_R; 1686 } 1687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1688 } 1689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1694 if (isCHOL) { 1695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1696 } else { 1697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1698 } 1699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1705 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1706 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1709 } 1710 1711 /* Get submatrices from subdomain matrix */ 1712 if (n_vertices) { 1713 IS is_aux; 1714 1715 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1716 IS tis; 1717 1718 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1719 ierr = ISSort(tis);CHKERRQ(ierr); 1720 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1721 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1722 } else { 1723 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1724 } 1725 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1727 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1728 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1729 } 1730 1731 /* Matrix of coarse basis functions (local) */ 1732 if (pcbddc->coarse_phi_B) { 1733 PetscInt on_B,on_primal,on_D=n_D; 1734 if (pcbddc->coarse_phi_D) { 1735 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1736 } 1737 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1738 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1739 PetscScalar *marray; 1740 1741 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1742 ierr = PetscFree(marray);CHKERRQ(ierr); 1743 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1744 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1745 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1746 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1747 } 1748 } 1749 1750 if (!pcbddc->coarse_phi_B) { 1751 PetscScalar *marray; 1752 1753 n = n_B*pcbddc->local_primal_size; 1754 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1755 n += n_D*pcbddc->local_primal_size; 1756 } 1757 if (!pcbddc->symmetric_primal) { 1758 n *= 2; 1759 } 1760 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1762 n = n_B*pcbddc->local_primal_size; 1763 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1765 n += n_D*pcbddc->local_primal_size; 1766 } 1767 if (!pcbddc->symmetric_primal) { 1768 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1769 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1770 n = n_B*pcbddc->local_primal_size; 1771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1772 } 1773 } else { 1774 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1775 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1776 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1777 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1778 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1779 } 1780 } 1781 } 1782 1783 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1784 p0_lidx_I = NULL; 1785 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1786 const PetscInt *idxs; 1787 1788 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1789 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1790 for (i=0;i<pcbddc->benign_n;i++) { 1791 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1792 } 1793 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1794 } 1795 1796 /* vertices */ 1797 if (n_vertices) { 1798 1799 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1800 1801 if (n_R) { 1802 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1803 PetscBLASInt B_N,B_one = 1; 1804 PetscScalar *x,*y; 1805 PetscBool isseqaij; 1806 1807 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1808 if (need_benign_correction) { 1809 ISLocalToGlobalMapping RtoN; 1810 IS is_p0; 1811 PetscInt *idxs_p0,n; 1812 1813 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1814 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1815 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1816 if (n != pcbddc->benign_n) { 1817 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1818 } 1819 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1820 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1821 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1822 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1823 } 1824 1825 if (lda_rhs == n_R) { 1826 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1827 } else { 1828 PetscScalar *av,*array; 1829 const PetscInt *xadj,*adjncy; 1830 PetscInt n; 1831 PetscBool flg_row; 1832 1833 array = work+lda_rhs*n_vertices; 1834 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1835 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1836 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1837 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1838 for (i=0;i<n;i++) { 1839 PetscInt j; 1840 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1841 } 1842 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1843 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1844 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1845 } 1846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1847 if (need_benign_correction) { 1848 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1849 PetscScalar *marr; 1850 1851 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1852 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1853 1854 | 0 0 0 | (V) 1855 L = | 0 0 -1 | (P-p0) 1856 | 0 0 -1 | (p0) 1857 1858 */ 1859 for (i=0;i<reuse_solver->benign_n;i++) { 1860 const PetscScalar *vals; 1861 const PetscInt *idxs,*idxs_zero; 1862 PetscInt n,j,nz; 1863 1864 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1865 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1866 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1867 for (j=0;j<n;j++) { 1868 PetscScalar val = vals[j]; 1869 PetscInt k,col = idxs[j]; 1870 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1871 } 1872 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1873 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1874 } 1875 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1876 } 1877 if (F) { 1878 /* need to correct the rhs */ 1879 if (need_benign_correction) { 1880 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1881 PetscScalar *marr; 1882 1883 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1884 if (lda_rhs != n_R) { 1885 for (i=0;i<n_vertices;i++) { 1886 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1887 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1888 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1889 } 1890 } else { 1891 for (i=0;i<n_vertices;i++) { 1892 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1893 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1894 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1895 } 1896 } 1897 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1898 } 1899 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1900 /* need to correct the solution */ 1901 if (need_benign_correction) { 1902 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1903 PetscScalar *marr; 1904 1905 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1906 if (lda_rhs != n_R) { 1907 for (i=0;i<n_vertices;i++) { 1908 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1909 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1910 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1911 } 1912 } else { 1913 for (i=0;i<n_vertices;i++) { 1914 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1915 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1916 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1917 } 1918 } 1919 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1920 } 1921 } else { 1922 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1923 for (i=0;i<n_vertices;i++) { 1924 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1925 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1926 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1927 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1928 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1929 } 1930 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1931 } 1932 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1933 /* S_VV and S_CV */ 1934 if (n_constraints) { 1935 Mat B; 1936 1937 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1938 for (i=0;i<n_vertices;i++) { 1939 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1940 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1941 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1942 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1943 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1944 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1945 } 1946 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1947 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1948 ierr = MatDestroy(&B);CHKERRQ(ierr); 1949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1950 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1951 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1953 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1954 ierr = MatDestroy(&B);CHKERRQ(ierr); 1955 } 1956 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1957 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1958 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1959 } 1960 if (lda_rhs != n_R) { 1961 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1962 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1963 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1964 } 1965 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1966 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1967 if (need_benign_correction) { 1968 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1969 PetscScalar *marr,*sums; 1970 1971 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1972 ierr = MatDenseGetArray(S_VVt,&marr); 1973 for (i=0;i<reuse_solver->benign_n;i++) { 1974 const PetscScalar *vals; 1975 const PetscInt *idxs,*idxs_zero; 1976 PetscInt n,j,nz; 1977 1978 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1979 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1980 for (j=0;j<n_vertices;j++) { 1981 PetscInt k; 1982 sums[j] = 0.; 1983 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1984 } 1985 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1986 for (j=0;j<n;j++) { 1987 PetscScalar val = vals[j]; 1988 PetscInt k; 1989 for (k=0;k<n_vertices;k++) { 1990 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1991 } 1992 } 1993 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1994 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1995 } 1996 ierr = PetscFree(sums);CHKERRQ(ierr); 1997 ierr = MatDenseRestoreArray(S_VVt,&marr); 1998 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1999 } 2000 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2001 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2002 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2003 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2004 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2005 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2006 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2007 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2008 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2009 } else { 2010 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2011 } 2012 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2013 2014 /* coarse basis functions */ 2015 for (i=0;i<n_vertices;i++) { 2016 PetscScalar *y; 2017 2018 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2019 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2020 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2021 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 y[n_B*i+idx_V_B[i]] = 1.0; 2024 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2025 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2026 2027 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2028 PetscInt j; 2029 2030 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2031 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2032 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2033 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2035 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2036 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2037 } 2038 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2039 } 2040 /* if n_R == 0 the object is not destroyed */ 2041 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2042 } 2043 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2044 2045 if (n_constraints) { 2046 Mat B; 2047 2048 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2049 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2050 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2051 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2052 if (n_vertices) { 2053 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2054 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2055 } else { 2056 Mat S_VCt; 2057 2058 if (lda_rhs != n_R) { 2059 ierr = MatDestroy(&B);CHKERRQ(ierr); 2060 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2061 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2062 } 2063 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2064 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2065 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2066 } 2067 } 2068 ierr = MatDestroy(&B);CHKERRQ(ierr); 2069 /* coarse basis functions */ 2070 for (i=0;i<n_constraints;i++) { 2071 PetscScalar *y; 2072 2073 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2074 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2075 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2076 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2077 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2078 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2079 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2080 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2081 PetscInt j; 2082 2083 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2084 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2085 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2086 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2087 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2088 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2089 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2090 } 2091 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2092 } 2093 } 2094 if (n_constraints) { 2095 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2096 } 2097 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2098 2099 /* coarse matrix entries relative to B_0 */ 2100 if (pcbddc->benign_n) { 2101 Mat B0_B,B0_BPHI; 2102 IS is_dummy; 2103 PetscScalar *data; 2104 PetscInt j; 2105 2106 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2107 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2108 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2109 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2110 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2111 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2112 for (j=0;j<pcbddc->benign_n;j++) { 2113 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2114 for (i=0;i<pcbddc->local_primal_size;i++) { 2115 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2116 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2117 } 2118 } 2119 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2120 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2121 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2122 } 2123 2124 /* compute other basis functions for non-symmetric problems */ 2125 if (!pcbddc->symmetric_primal) { 2126 Mat B_V=NULL,B_C=NULL; 2127 PetscScalar *marray; 2128 2129 if (n_constraints) { 2130 Mat S_CCT,C_CRT; 2131 2132 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2133 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2134 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2135 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2136 if (n_vertices) { 2137 Mat S_VCT; 2138 2139 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2140 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2141 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2142 } 2143 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2144 } 2145 if (n_vertices && n_R) { 2146 PetscScalar *av,*marray; 2147 const PetscInt *xadj,*adjncy; 2148 PetscInt n; 2149 PetscBool flg_row; 2150 2151 /* B_V = B_V - A_VR^T */ 2152 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2153 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2154 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2155 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2156 for (i=0;i<n;i++) { 2157 PetscInt j; 2158 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2159 } 2160 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2161 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2162 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2163 } 2164 2165 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2166 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2167 for (i=0;i<n_vertices;i++) { 2168 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2169 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2170 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2171 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2172 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2173 } 2174 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2175 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2176 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2177 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2178 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2179 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2180 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2181 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2182 } 2183 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2184 /* coarse basis functions */ 2185 for (i=0;i<pcbddc->local_primal_size;i++) { 2186 PetscScalar *y; 2187 2188 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2189 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2190 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2191 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2192 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2193 if (i<n_vertices) { 2194 y[n_B*i+idx_V_B[i]] = 1.0; 2195 } 2196 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2197 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2198 2199 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2200 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2201 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2202 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2203 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2204 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2205 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2206 } 2207 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2208 } 2209 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2210 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2211 } 2212 /* free memory */ 2213 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2214 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2215 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2216 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2217 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2218 ierr = PetscFree(work);CHKERRQ(ierr); 2219 if (n_vertices) { 2220 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2221 } 2222 if (n_constraints) { 2223 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2224 } 2225 /* Checking coarse_sub_mat and coarse basis functios */ 2226 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2227 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2228 if (pcbddc->dbg_flag) { 2229 Mat coarse_sub_mat; 2230 Mat AUXMAT,TM1,TM2,TM3,TM4; 2231 Mat coarse_phi_D,coarse_phi_B; 2232 Mat coarse_psi_D,coarse_psi_B; 2233 Mat A_II,A_BB,A_IB,A_BI; 2234 Mat C_B,CPHI; 2235 IS is_dummy; 2236 Vec mones; 2237 MatType checkmattype=MATSEQAIJ; 2238 PetscReal real_value; 2239 2240 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2241 Mat A; 2242 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2243 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2244 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2245 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2246 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2247 ierr = MatDestroy(&A);CHKERRQ(ierr); 2248 } else { 2249 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2250 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2251 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2252 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2253 } 2254 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2255 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2256 if (!pcbddc->symmetric_primal) { 2257 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2258 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2259 } 2260 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2261 2262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2265 if (!pcbddc->symmetric_primal) { 2266 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2267 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2268 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2269 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2270 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2271 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2272 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2274 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2276 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2277 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2278 } else { 2279 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2280 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2281 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2282 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2283 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2284 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2285 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2286 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2287 } 2288 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2289 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2290 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2291 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2292 if (pcbddc->benign_n) { 2293 Mat B0_B,B0_BPHI; 2294 PetscScalar *data,*data2; 2295 PetscInt j; 2296 2297 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2298 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2299 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2300 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2301 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2302 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2303 for (j=0;j<pcbddc->benign_n;j++) { 2304 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2305 for (i=0;i<pcbddc->local_primal_size;i++) { 2306 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2307 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2308 } 2309 } 2310 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2311 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2312 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2313 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2314 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2315 } 2316 #if 0 2317 { 2318 PetscViewer viewer; 2319 char filename[256]; 2320 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2321 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2322 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2323 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2324 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2325 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2326 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2327 if (save_change) { 2328 Mat phi_B; 2329 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2330 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2331 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2332 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2333 } else { 2334 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2335 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2336 } 2337 if (pcbddc->coarse_phi_D) { 2338 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2339 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2340 } 2341 if (pcbddc->coarse_psi_B) { 2342 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2343 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2344 } 2345 if (pcbddc->coarse_psi_D) { 2346 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2347 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2348 } 2349 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2350 } 2351 #endif 2352 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2353 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2354 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2355 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2356 2357 /* check constraints */ 2358 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2359 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 2360 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 2361 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2362 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2363 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2364 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2365 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2367 if (!pcbddc->symmetric_primal) { 2368 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2369 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2370 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2371 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2373 } 2374 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2375 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2376 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2377 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2378 } 2379 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2380 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2381 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2382 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2383 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2384 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2385 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2386 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2387 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2388 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2389 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2390 if (!pcbddc->symmetric_primal) { 2391 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2392 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2393 } 2394 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2395 } 2396 /* get back data */ 2397 *coarse_submat_vals_n = coarse_submat_vals; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2403 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2404 { 2405 Mat *work_mat; 2406 IS isrow_s,iscol_s; 2407 PetscBool rsorted,csorted; 2408 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2409 PetscErrorCode ierr; 2410 2411 PetscFunctionBegin; 2412 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2413 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2414 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2415 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2416 2417 if (!rsorted) { 2418 const PetscInt *idxs; 2419 PetscInt *idxs_sorted,i; 2420 2421 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2422 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2423 for (i=0;i<rsize;i++) { 2424 idxs_perm_r[i] = i; 2425 } 2426 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2427 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2428 for (i=0;i<rsize;i++) { 2429 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2430 } 2431 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2432 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2433 } else { 2434 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2435 isrow_s = isrow; 2436 } 2437 2438 if (!csorted) { 2439 if (isrow == iscol) { 2440 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2441 iscol_s = isrow_s; 2442 } else { 2443 const PetscInt *idxs; 2444 PetscInt *idxs_sorted,i; 2445 2446 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2447 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2448 for (i=0;i<csize;i++) { 2449 idxs_perm_c[i] = i; 2450 } 2451 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2452 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2453 for (i=0;i<csize;i++) { 2454 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2455 } 2456 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2457 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2458 } 2459 } else { 2460 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2461 iscol_s = iscol; 2462 } 2463 2464 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2465 2466 if (!rsorted || !csorted) { 2467 Mat new_mat; 2468 IS is_perm_r,is_perm_c; 2469 2470 if (!rsorted) { 2471 PetscInt *idxs_r,i; 2472 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2473 for (i=0;i<rsize;i++) { 2474 idxs_r[idxs_perm_r[i]] = i; 2475 } 2476 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2477 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2478 } else { 2479 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2480 } 2481 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2482 2483 if (!csorted) { 2484 if (isrow_s == iscol_s) { 2485 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2486 is_perm_c = is_perm_r; 2487 } else { 2488 PetscInt *idxs_c,i; 2489 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2490 for (i=0;i<csize;i++) { 2491 idxs_c[idxs_perm_c[i]] = i; 2492 } 2493 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2494 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2495 } 2496 } else { 2497 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2498 } 2499 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2500 2501 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2502 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2503 work_mat[0] = new_mat; 2504 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2505 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2506 } 2507 2508 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2509 *B = work_mat[0]; 2510 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2511 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2512 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2518 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2519 { 2520 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2521 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2522 Mat new_mat; 2523 IS is_local,is_global; 2524 PetscInt local_size; 2525 PetscBool isseqaij; 2526 PetscErrorCode ierr; 2527 2528 PetscFunctionBegin; 2529 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2530 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2531 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2532 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2533 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2534 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2535 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2536 2537 /* check */ 2538 if (pcbddc->dbg_flag) { 2539 Vec x,x_change; 2540 PetscReal error; 2541 2542 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2543 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2544 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2545 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2546 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2547 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2548 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2549 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2550 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2551 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2552 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2553 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2554 ierr = VecDestroy(&x);CHKERRQ(ierr); 2555 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2556 } 2557 2558 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2559 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2560 if (isseqaij) { 2561 Mat M; 2562 2563 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2564 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2565 ierr = MatDestroy(&M);CHKERRQ(ierr); 2566 } else { 2567 Mat work_mat,M; 2568 2569 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2570 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2571 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2572 ierr = MatDestroy(&M);CHKERRQ(ierr); 2573 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2574 } 2575 if (matis->A->symmetric_set) { 2576 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2577 #if !defined(PETSC_USE_COMPLEX) 2578 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2579 #endif 2580 } 2581 /* 2582 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2583 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2584 */ 2585 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 #undef __FUNCT__ 2590 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2591 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2592 { 2593 PC_IS* pcis = (PC_IS*)(pc->data); 2594 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2595 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2596 PetscInt *idx_R_local=NULL; 2597 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2598 PetscInt vbs,bs; 2599 PetscBT bitmask=NULL; 2600 PetscErrorCode ierr; 2601 2602 PetscFunctionBegin; 2603 /* 2604 No need to setup local scatters if 2605 - primal space is unchanged 2606 AND 2607 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2608 AND 2609 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2610 */ 2611 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2612 PetscFunctionReturn(0); 2613 } 2614 /* destroy old objects */ 2615 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2616 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2617 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2618 /* Set Non-overlapping dimensions */ 2619 n_B = pcis->n_B; 2620 n_D = pcis->n - n_B; 2621 n_vertices = pcbddc->n_vertices; 2622 2623 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2624 2625 /* create auxiliary bitmask and allocate workspace */ 2626 if (!sub_schurs->reuse_solver) { 2627 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2628 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2629 for (i=0;i<n_vertices;i++) { 2630 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2631 } 2632 2633 for (i=0, n_R=0; i<pcis->n; i++) { 2634 if (!PetscBTLookup(bitmask,i)) { 2635 idx_R_local[n_R++] = i; 2636 } 2637 } 2638 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2639 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2640 2641 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2642 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2643 } 2644 2645 /* Block code */ 2646 vbs = 1; 2647 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2648 if (bs>1 && !(n_vertices%bs)) { 2649 PetscBool is_blocked = PETSC_TRUE; 2650 PetscInt *vary; 2651 if (!sub_schurs->reuse_solver) { 2652 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2653 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2654 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2655 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2656 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2657 for (i=0; i<pcis->n/bs; i++) { 2658 if (vary[i]!=0 && vary[i]!=bs) { 2659 is_blocked = PETSC_FALSE; 2660 break; 2661 } 2662 } 2663 ierr = PetscFree(vary);CHKERRQ(ierr); 2664 } else { 2665 /* Verify directly the R set */ 2666 for (i=0; i<n_R/bs; i++) { 2667 PetscInt j,node=idx_R_local[bs*i]; 2668 for (j=1; j<bs; j++) { 2669 if (node != idx_R_local[bs*i+j]-j) { 2670 is_blocked = PETSC_FALSE; 2671 break; 2672 } 2673 } 2674 } 2675 } 2676 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2677 vbs = bs; 2678 for (i=0;i<n_R/vbs;i++) { 2679 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2680 } 2681 } 2682 } 2683 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2684 if (sub_schurs->reuse_solver) { 2685 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2686 2687 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2688 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2689 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2690 reuse_solver->is_R = pcbddc->is_R_local; 2691 } else { 2692 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2693 } 2694 2695 /* print some info if requested */ 2696 if (pcbddc->dbg_flag) { 2697 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2698 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2699 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2704 } 2705 2706 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2707 if (!sub_schurs->reuse_solver) { 2708 IS is_aux1,is_aux2; 2709 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2710 2711 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2712 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2713 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2714 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2715 for (i=0; i<n_D; i++) { 2716 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2717 } 2718 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2719 for (i=0, j=0; i<n_R; i++) { 2720 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2721 aux_array1[j++] = i; 2722 } 2723 } 2724 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2725 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2726 for (i=0, j=0; i<n_B; i++) { 2727 if (!PetscBTLookup(bitmask,is_indices[i])) { 2728 aux_array2[j++] = i; 2729 } 2730 } 2731 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2732 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2733 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2734 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2735 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2736 2737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2738 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2739 for (i=0, j=0; i<n_R; i++) { 2740 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2741 aux_array1[j++] = i; 2742 } 2743 } 2744 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2745 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2746 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2747 } 2748 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2749 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2750 } else { 2751 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2752 IS tis; 2753 PetscInt schur_size; 2754 2755 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2756 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2757 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2758 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2760 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2761 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2762 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2763 } 2764 } 2765 PetscFunctionReturn(0); 2766 } 2767 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2771 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2772 { 2773 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2774 PC_IS *pcis = (PC_IS*)pc->data; 2775 PC pc_temp; 2776 Mat A_RR; 2777 MatReuse reuse; 2778 PetscScalar m_one = -1.0; 2779 PetscReal value; 2780 PetscInt n_D,n_R; 2781 PetscBool use_exact,use_exact_reduced,issbaij; 2782 PetscErrorCode ierr; 2783 /* prefixes stuff */ 2784 char dir_prefix[256],neu_prefix[256],str_level[16]; 2785 size_t len; 2786 2787 PetscFunctionBegin; 2788 2789 /* compute prefixes */ 2790 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2791 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2792 if (!pcbddc->current_level) { 2793 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2794 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2795 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2796 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2797 } else { 2798 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2799 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2800 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2801 len -= 15; /* remove "pc_bddc_coarse_" */ 2802 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2803 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2804 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2805 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2806 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2807 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2808 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2809 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2810 } 2811 2812 /* DIRICHLET PROBLEM */ 2813 if (dirichlet) { 2814 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2815 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2816 if (!sub_schurs->reuse_solver) { 2817 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2818 } 2819 if (pcbddc->dbg_flag) { 2820 Mat A_IIn; 2821 2822 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2823 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2824 pcis->A_II = A_IIn; 2825 } 2826 } 2827 if (pcbddc->local_mat->symmetric_set) { 2828 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2829 } 2830 /* Matrix for Dirichlet problem is pcis->A_II */ 2831 n_D = pcis->n - pcis->n_B; 2832 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2833 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2834 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2835 /* default */ 2836 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2837 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2838 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2839 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2840 if (issbaij) { 2841 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2842 } else { 2843 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2844 } 2845 /* Allow user's customization */ 2846 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2847 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2848 } 2849 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2850 if (sub_schurs->reuse_solver) { 2851 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2852 2853 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2854 } 2855 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2856 if (!n_D) { 2857 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2858 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2859 } 2860 /* Set Up KSP for Dirichlet problem of BDDC */ 2861 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2862 /* set ksp_D into pcis data */ 2863 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2864 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2865 pcis->ksp_D = pcbddc->ksp_D; 2866 } 2867 2868 /* NEUMANN PROBLEM */ 2869 A_RR = 0; 2870 if (neumann) { 2871 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2872 PetscInt ibs,mbs; 2873 PetscBool issbaij; 2874 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2875 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2876 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2877 if (pcbddc->ksp_R) { /* already created ksp */ 2878 PetscInt nn_R; 2879 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2880 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2881 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2882 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2883 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2884 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2885 reuse = MAT_INITIAL_MATRIX; 2886 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2887 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2888 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2889 reuse = MAT_INITIAL_MATRIX; 2890 } else { /* safe to reuse the matrix */ 2891 reuse = MAT_REUSE_MATRIX; 2892 } 2893 } 2894 /* last check */ 2895 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2896 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2897 reuse = MAT_INITIAL_MATRIX; 2898 } 2899 } else { /* first time, so we need to create the matrix */ 2900 reuse = MAT_INITIAL_MATRIX; 2901 } 2902 /* extract A_RR */ 2903 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2904 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2905 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2906 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2907 if (matis->A == pcbddc->local_mat) { 2908 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2909 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2910 } else { 2911 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2912 } 2913 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2914 if (matis->A == pcbddc->local_mat) { 2915 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2916 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2917 } else { 2918 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2919 } 2920 } 2921 if (pcbddc->benign_n && !pcbddc->benign_change_explicit && pcbddc->dbg_flag) { 2922 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2923 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2924 } else { 2925 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2926 } 2927 if (pcbddc->local_mat->symmetric_set) { 2928 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2929 } 2930 if (!pcbddc->ksp_R) { /* create object if not present */ 2931 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2932 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2933 /* default */ 2934 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2935 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2936 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2937 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2938 if (issbaij) { 2939 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2940 } else { 2941 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2942 } 2943 /* Allow user's customization */ 2944 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2945 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2946 } 2947 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2948 if (!n_R) { 2949 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2950 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2951 } 2952 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2953 /* Reuse solver if it is present */ 2954 if (sub_schurs->reuse_solver) { 2955 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2956 2957 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2958 } 2959 /* Set Up KSP for Neumann problem of BDDC */ 2960 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2961 } 2962 2963 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2964 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2965 if (pcbddc->dbg_flag) { 2966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2967 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2969 } 2970 if (dirichlet) { /* Dirichlet */ 2971 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2972 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2973 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2974 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2975 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2976 /* need to be adapted? */ 2977 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2978 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2979 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2980 /* print info */ 2981 if (pcbddc->dbg_flag) { 2982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2984 } 2985 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2986 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2987 } 2988 } 2989 if (neumann) { /* Neumann */ 2990 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2991 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2992 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2993 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2994 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2995 /* need to be adapted? */ 2996 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2997 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2998 /* print info */ 2999 if (pcbddc->dbg_flag) { 3000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3002 } 3003 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3004 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3005 } 3006 } 3007 } 3008 /* free Neumann problem's matrix */ 3009 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3015 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3016 { 3017 PetscErrorCode ierr; 3018 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3019 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3020 3021 PetscFunctionBegin; 3022 if (!sub_schurs->reuse_solver) { 3023 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3024 } 3025 if (!pcbddc->switch_static) { 3026 if (applytranspose && pcbddc->local_auxmat1) { 3027 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3028 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3029 } 3030 if (!sub_schurs->reuse_solver) { 3031 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3032 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3033 } else { 3034 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3035 3036 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3037 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3038 } 3039 } else { 3040 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3041 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3042 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3043 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3044 if (applytranspose && pcbddc->local_auxmat1) { 3045 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3046 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3047 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3048 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3049 } 3050 } 3051 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3052 if (applytranspose) { 3053 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3054 } else { 3055 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3056 } 3057 } else { 3058 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3059 3060 if (applytranspose) { 3061 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3062 } else { 3063 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3064 } 3065 } 3066 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3067 if (!pcbddc->switch_static) { 3068 if (!sub_schurs->reuse_solver) { 3069 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3070 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3071 } else { 3072 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3073 3074 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3075 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3076 } 3077 if (!applytranspose && pcbddc->local_auxmat1) { 3078 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3079 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3080 } 3081 } else { 3082 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3083 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3084 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 if (!applytranspose && pcbddc->local_auxmat1) { 3087 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3088 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3089 } 3090 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3091 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3092 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3093 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3094 } 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3101 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3102 { 3103 PetscErrorCode ierr; 3104 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3105 PC_IS* pcis = (PC_IS*) (pc->data); 3106 const PetscScalar zero = 0.0; 3107 3108 PetscFunctionBegin; 3109 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3110 if (!pcbddc->benign_apply_coarse_only) { 3111 if (applytranspose) { 3112 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3113 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3114 } else { 3115 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3116 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3117 } 3118 } else { 3119 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3120 } 3121 3122 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3123 if (pcbddc->benign_n) { 3124 PetscScalar *array; 3125 PetscInt j; 3126 3127 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3128 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3129 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3130 } 3131 3132 /* start communications from local primal nodes to rhs of coarse solver */ 3133 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3134 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3135 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3136 3137 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3138 /* TODO remove null space when doing multilevel */ 3139 if (pcbddc->coarse_ksp) { 3140 Mat coarse_mat; 3141 Vec rhs,sol; 3142 MatNullSpace nullsp; 3143 3144 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3145 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3146 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3147 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3148 if (nullsp) { 3149 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3150 } 3151 if (applytranspose) { 3152 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3153 } else { 3154 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3155 } 3156 if (nullsp) { 3157 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3158 } 3159 } 3160 3161 /* Local solution on R nodes */ 3162 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3163 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3164 } 3165 /* communications from coarse sol to local primal nodes */ 3166 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3167 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3168 3169 /* Sum contributions from the two levels */ 3170 if (!pcbddc->benign_apply_coarse_only) { 3171 if (applytranspose) { 3172 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3173 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3174 } else { 3175 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3176 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3177 } 3178 /* store p0 */ 3179 if (pcbddc->benign_n) { 3180 PetscScalar *array; 3181 PetscInt j; 3182 3183 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3184 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3185 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3186 } 3187 } else { /* expand the coarse solution */ 3188 if (applytranspose) { 3189 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3190 } else { 3191 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3192 } 3193 } 3194 PetscFunctionReturn(0); 3195 } 3196 3197 #undef __FUNCT__ 3198 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3199 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3200 { 3201 PetscErrorCode ierr; 3202 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3203 PetscScalar *array; 3204 Vec from,to; 3205 3206 PetscFunctionBegin; 3207 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3208 from = pcbddc->coarse_vec; 3209 to = pcbddc->vec1_P; 3210 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3211 Vec tvec; 3212 3213 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3214 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3215 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3216 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3217 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3218 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3219 } 3220 } else { /* from local to global -> put data in coarse right hand side */ 3221 from = pcbddc->vec1_P; 3222 to = pcbddc->coarse_vec; 3223 } 3224 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3230 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3231 { 3232 PetscErrorCode ierr; 3233 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3234 PetscScalar *array; 3235 Vec from,to; 3236 3237 PetscFunctionBegin; 3238 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3239 from = pcbddc->coarse_vec; 3240 to = pcbddc->vec1_P; 3241 } else { /* from local to global -> put data in coarse right hand side */ 3242 from = pcbddc->vec1_P; 3243 to = pcbddc->coarse_vec; 3244 } 3245 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3246 if (smode == SCATTER_FORWARD) { 3247 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3248 Vec tvec; 3249 3250 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3251 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3252 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3253 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3254 } 3255 } else { 3256 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3257 ierr = VecResetArray(from);CHKERRQ(ierr); 3258 } 3259 } 3260 PetscFunctionReturn(0); 3261 } 3262 3263 /* uncomment for testing purposes */ 3264 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3267 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3268 { 3269 PetscErrorCode ierr; 3270 PC_IS* pcis = (PC_IS*)(pc->data); 3271 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3272 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3273 /* one and zero */ 3274 PetscScalar one=1.0,zero=0.0; 3275 /* space to store constraints and their local indices */ 3276 PetscScalar *constraints_data; 3277 PetscInt *constraints_idxs,*constraints_idxs_B; 3278 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3279 PetscInt *constraints_n; 3280 /* iterators */ 3281 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3282 /* BLAS integers */ 3283 PetscBLASInt lwork,lierr; 3284 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3285 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3286 /* reuse */ 3287 PetscInt olocal_primal_size,olocal_primal_size_cc; 3288 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3289 /* change of basis */ 3290 PetscBool qr_needed; 3291 PetscBT change_basis,qr_needed_idx; 3292 /* auxiliary stuff */ 3293 PetscInt *nnz,*is_indices; 3294 PetscInt ncc; 3295 /* some quantities */ 3296 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3297 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3298 3299 PetscFunctionBegin; 3300 /* Destroy Mat objects computed previously */ 3301 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3302 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3303 /* save info on constraints from previous setup (if any) */ 3304 olocal_primal_size = pcbddc->local_primal_size; 3305 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3306 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3307 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3308 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3309 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3310 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3311 3312 /* print some info */ 3313 if (pcbddc->dbg_flag) { 3314 IS vertices; 3315 PetscInt nv,nedges,nfaces; 3316 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3317 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3318 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3319 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3320 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3324 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3325 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3326 } 3327 3328 if (!pcbddc->adaptive_selection) { 3329 IS ISForVertices,*ISForFaces,*ISForEdges; 3330 MatNullSpace nearnullsp; 3331 const Vec *nearnullvecs; 3332 Vec *localnearnullsp; 3333 PetscScalar *array; 3334 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3335 PetscBool nnsp_has_cnst; 3336 /* LAPACK working arrays for SVD or POD */ 3337 PetscBool skip_lapack,boolforchange; 3338 PetscScalar *work; 3339 PetscReal *singular_vals; 3340 #if defined(PETSC_USE_COMPLEX) 3341 PetscReal *rwork; 3342 #endif 3343 #if defined(PETSC_MISSING_LAPACK_GESVD) 3344 PetscScalar *temp_basis,*correlation_mat; 3345 #else 3346 PetscBLASInt dummy_int=1; 3347 PetscScalar dummy_scalar=1.; 3348 #endif 3349 3350 /* Get index sets for faces, edges and vertices from graph */ 3351 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3352 /* free unneeded index sets */ 3353 if (!pcbddc->use_vertices) { 3354 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3355 } 3356 if (!pcbddc->use_edges) { 3357 for (i=0;i<n_ISForEdges;i++) { 3358 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3359 } 3360 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3361 n_ISForEdges = 0; 3362 } 3363 if (!pcbddc->use_faces) { 3364 for (i=0;i<n_ISForFaces;i++) { 3365 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3366 } 3367 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3368 n_ISForFaces = 0; 3369 } 3370 3371 #if defined(PETSC_USE_DEBUG) 3372 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3373 Also use_change_of_basis should be consistent among processors */ 3374 if (pcbddc->NullSpace) { 3375 PetscBool tbool[2],gbool[2]; 3376 3377 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3378 pcbddc->use_change_of_basis = PETSC_TRUE; 3379 if (!ISForEdges) { 3380 pcbddc->use_change_on_faces = PETSC_TRUE; 3381 } 3382 } 3383 tbool[0] = pcbddc->use_change_of_basis; 3384 tbool[1] = pcbddc->use_change_on_faces; 3385 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3386 pcbddc->use_change_of_basis = gbool[0]; 3387 pcbddc->use_change_on_faces = gbool[1]; 3388 } 3389 #endif 3390 3391 /* check if near null space is attached to global mat */ 3392 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3393 if (nearnullsp) { 3394 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3395 /* remove any stored info */ 3396 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3397 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3398 /* store information for BDDC solver reuse */ 3399 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3400 pcbddc->onearnullspace = nearnullsp; 3401 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3402 for (i=0;i<nnsp_size;i++) { 3403 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3404 } 3405 } else { /* if near null space is not provided BDDC uses constants by default */ 3406 nnsp_size = 0; 3407 nnsp_has_cnst = PETSC_TRUE; 3408 } 3409 /* get max number of constraints on a single cc */ 3410 max_constraints = nnsp_size; 3411 if (nnsp_has_cnst) max_constraints++; 3412 3413 /* 3414 Evaluate maximum storage size needed by the procedure 3415 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3416 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3417 There can be multiple constraints per connected component 3418 */ 3419 n_vertices = 0; 3420 if (ISForVertices) { 3421 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3422 } 3423 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3424 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3425 3426 total_counts = n_ISForFaces+n_ISForEdges; 3427 total_counts *= max_constraints; 3428 total_counts += n_vertices; 3429 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3430 3431 total_counts = 0; 3432 max_size_of_constraint = 0; 3433 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3434 IS used_is; 3435 if (i<n_ISForEdges) { 3436 used_is = ISForEdges[i]; 3437 } else { 3438 used_is = ISForFaces[i-n_ISForEdges]; 3439 } 3440 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3441 total_counts += j; 3442 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3443 } 3444 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3445 3446 /* get local part of global near null space vectors */ 3447 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3448 for (k=0;k<nnsp_size;k++) { 3449 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3450 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3451 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3452 } 3453 3454 /* whether or not to skip lapack calls */ 3455 skip_lapack = PETSC_TRUE; 3456 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3457 3458 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3459 if (!skip_lapack) { 3460 PetscScalar temp_work; 3461 3462 #if defined(PETSC_MISSING_LAPACK_GESVD) 3463 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3464 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3465 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3466 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3467 #if defined(PETSC_USE_COMPLEX) 3468 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3469 #endif 3470 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3471 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3472 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3473 lwork = -1; 3474 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3475 #if !defined(PETSC_USE_COMPLEX) 3476 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3477 #else 3478 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3479 #endif 3480 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3481 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3482 #else /* on missing GESVD */ 3483 /* SVD */ 3484 PetscInt max_n,min_n; 3485 max_n = max_size_of_constraint; 3486 min_n = max_constraints; 3487 if (max_size_of_constraint < max_constraints) { 3488 min_n = max_size_of_constraint; 3489 max_n = max_constraints; 3490 } 3491 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3492 #if defined(PETSC_USE_COMPLEX) 3493 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3494 #endif 3495 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3496 lwork = -1; 3497 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3498 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3499 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3500 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3501 #if !defined(PETSC_USE_COMPLEX) 3502 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3503 #else 3504 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3505 #endif 3506 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3507 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3508 #endif /* on missing GESVD */ 3509 /* Allocate optimal workspace */ 3510 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3511 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3512 } 3513 /* Now we can loop on constraining sets */ 3514 total_counts = 0; 3515 constraints_idxs_ptr[0] = 0; 3516 constraints_data_ptr[0] = 0; 3517 /* vertices */ 3518 if (n_vertices) { 3519 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3520 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3521 for (i=0;i<n_vertices;i++) { 3522 constraints_n[total_counts] = 1; 3523 constraints_data[total_counts] = 1.0; 3524 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3525 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3526 total_counts++; 3527 } 3528 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3529 n_vertices = total_counts; 3530 } 3531 3532 /* edges and faces */ 3533 total_counts_cc = total_counts; 3534 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3535 IS used_is; 3536 PetscBool idxs_copied = PETSC_FALSE; 3537 3538 if (ncc<n_ISForEdges) { 3539 used_is = ISForEdges[ncc]; 3540 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3541 } else { 3542 used_is = ISForFaces[ncc-n_ISForEdges]; 3543 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3544 } 3545 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3546 3547 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3548 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3549 /* change of basis should not be performed on local periodic nodes */ 3550 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3551 if (nnsp_has_cnst) { 3552 PetscScalar quad_value; 3553 3554 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3555 idxs_copied = PETSC_TRUE; 3556 3557 if (!pcbddc->use_nnsp_true) { 3558 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3559 } else { 3560 quad_value = 1.0; 3561 } 3562 for (j=0;j<size_of_constraint;j++) { 3563 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3564 } 3565 temp_constraints++; 3566 total_counts++; 3567 } 3568 for (k=0;k<nnsp_size;k++) { 3569 PetscReal real_value; 3570 PetscScalar *ptr_to_data; 3571 3572 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3573 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3574 for (j=0;j<size_of_constraint;j++) { 3575 ptr_to_data[j] = array[is_indices[j]]; 3576 } 3577 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3578 /* check if array is null on the connected component */ 3579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3580 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3581 if (real_value > 0.0) { /* keep indices and values */ 3582 temp_constraints++; 3583 total_counts++; 3584 if (!idxs_copied) { 3585 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3586 idxs_copied = PETSC_TRUE; 3587 } 3588 } 3589 } 3590 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3591 valid_constraints = temp_constraints; 3592 if (!pcbddc->use_nnsp_true && temp_constraints) { 3593 if (temp_constraints == 1) { /* just normalize the constraint */ 3594 PetscScalar norm,*ptr_to_data; 3595 3596 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3597 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3598 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3599 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3600 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3601 } else { /* perform SVD */ 3602 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3603 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3604 3605 #if defined(PETSC_MISSING_LAPACK_GESVD) 3606 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3607 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3608 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3609 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3610 from that computed using LAPACKgesvd 3611 -> This is due to a different computation of eigenvectors in LAPACKheev 3612 -> The quality of the POD-computed basis will be the same */ 3613 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3614 /* Store upper triangular part of correlation matrix */ 3615 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3616 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3617 for (j=0;j<temp_constraints;j++) { 3618 for (k=0;k<j+1;k++) { 3619 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3620 } 3621 } 3622 /* compute eigenvalues and eigenvectors of correlation matrix */ 3623 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3624 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3625 #if !defined(PETSC_USE_COMPLEX) 3626 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3627 #else 3628 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3629 #endif 3630 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3631 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3632 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3633 j = 0; 3634 while (j < temp_constraints && singular_vals[j] < tol) j++; 3635 total_counts = total_counts-j; 3636 valid_constraints = temp_constraints-j; 3637 /* scale and copy POD basis into used quadrature memory */ 3638 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3639 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3640 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3641 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3642 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3643 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3644 if (j<temp_constraints) { 3645 PetscInt ii; 3646 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3647 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3648 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3649 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3650 for (k=0;k<temp_constraints-j;k++) { 3651 for (ii=0;ii<size_of_constraint;ii++) { 3652 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3653 } 3654 } 3655 } 3656 #else /* on missing GESVD */ 3657 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3658 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3659 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3660 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3661 #if !defined(PETSC_USE_COMPLEX) 3662 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3663 #else 3664 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3665 #endif 3666 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3667 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3668 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3669 k = temp_constraints; 3670 if (k > size_of_constraint) k = size_of_constraint; 3671 j = 0; 3672 while (j < k && singular_vals[k-j-1] < tol) j++; 3673 valid_constraints = k-j; 3674 total_counts = total_counts-temp_constraints+valid_constraints; 3675 #endif /* on missing GESVD */ 3676 } 3677 } 3678 /* update pointers information */ 3679 if (valid_constraints) { 3680 constraints_n[total_counts_cc] = valid_constraints; 3681 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3682 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3683 /* set change_of_basis flag */ 3684 if (boolforchange) { 3685 PetscBTSet(change_basis,total_counts_cc); 3686 } 3687 total_counts_cc++; 3688 } 3689 } 3690 /* free workspace */ 3691 if (!skip_lapack) { 3692 ierr = PetscFree(work);CHKERRQ(ierr); 3693 #if defined(PETSC_USE_COMPLEX) 3694 ierr = PetscFree(rwork);CHKERRQ(ierr); 3695 #endif 3696 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3697 #if defined(PETSC_MISSING_LAPACK_GESVD) 3698 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3699 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3700 #endif 3701 } 3702 for (k=0;k<nnsp_size;k++) { 3703 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3704 } 3705 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3706 /* free index sets of faces, edges and vertices */ 3707 for (i=0;i<n_ISForFaces;i++) { 3708 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3709 } 3710 if (n_ISForFaces) { 3711 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3712 } 3713 for (i=0;i<n_ISForEdges;i++) { 3714 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3715 } 3716 if (n_ISForEdges) { 3717 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3718 } 3719 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3720 } else { 3721 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3722 3723 total_counts = 0; 3724 n_vertices = 0; 3725 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3726 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3727 } 3728 max_constraints = 0; 3729 total_counts_cc = 0; 3730 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3731 total_counts += pcbddc->adaptive_constraints_n[i]; 3732 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3733 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3734 } 3735 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3736 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3737 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3738 constraints_data = pcbddc->adaptive_constraints_data; 3739 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3740 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3741 total_counts_cc = 0; 3742 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3743 if (pcbddc->adaptive_constraints_n[i]) { 3744 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3745 } 3746 } 3747 #if 0 3748 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3749 for (i=0;i<total_counts_cc;i++) { 3750 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3751 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3752 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3753 printf(" %d",constraints_idxs[j]); 3754 } 3755 printf("\n"); 3756 printf("number of cc: %d\n",constraints_n[i]); 3757 } 3758 for (i=0;i<n_vertices;i++) { 3759 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3760 } 3761 for (i=0;i<sub_schurs->n_subs;i++) { 3762 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3763 } 3764 #endif 3765 3766 max_size_of_constraint = 0; 3767 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3768 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3769 /* Change of basis */ 3770 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3771 if (pcbddc->use_change_of_basis) { 3772 for (i=0;i<sub_schurs->n_subs;i++) { 3773 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3774 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3775 } 3776 } 3777 } 3778 } 3779 pcbddc->local_primal_size = total_counts; 3780 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3781 3782 /* map constraints_idxs in boundary numbering */ 3783 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3784 if (i != constraints_idxs_ptr[total_counts_cc]) { 3785 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3786 } 3787 3788 /* Create constraint matrix */ 3789 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3790 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3791 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3792 3793 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3794 /* determine if a QR strategy is needed for change of basis */ 3795 qr_needed = PETSC_FALSE; 3796 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3797 total_primal_vertices=0; 3798 pcbddc->local_primal_size_cc = 0; 3799 for (i=0;i<total_counts_cc;i++) { 3800 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3801 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3802 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3803 pcbddc->local_primal_size_cc += 1; 3804 } else if (PetscBTLookup(change_basis,i)) { 3805 for (k=0;k<constraints_n[i];k++) { 3806 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3807 } 3808 pcbddc->local_primal_size_cc += constraints_n[i]; 3809 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3810 PetscBTSet(qr_needed_idx,i); 3811 qr_needed = PETSC_TRUE; 3812 } 3813 } else { 3814 pcbddc->local_primal_size_cc += 1; 3815 } 3816 } 3817 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3818 pcbddc->n_vertices = total_primal_vertices; 3819 /* permute indices in order to have a sorted set of vertices */ 3820 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3821 3822 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3823 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3824 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3825 3826 /* nonzero structure of constraint matrix */ 3827 /* and get reference dof for local constraints */ 3828 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3829 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3830 3831 j = total_primal_vertices; 3832 total_counts = total_primal_vertices; 3833 cum = total_primal_vertices; 3834 for (i=n_vertices;i<total_counts_cc;i++) { 3835 if (!PetscBTLookup(change_basis,i)) { 3836 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3837 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3838 cum++; 3839 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3840 for (k=0;k<constraints_n[i];k++) { 3841 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3842 nnz[j+k] = size_of_constraint; 3843 } 3844 j += constraints_n[i]; 3845 } 3846 } 3847 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3848 ierr = PetscFree(nnz);CHKERRQ(ierr); 3849 3850 /* set values in constraint matrix */ 3851 for (i=0;i<total_primal_vertices;i++) { 3852 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3853 } 3854 total_counts = total_primal_vertices; 3855 for (i=n_vertices;i<total_counts_cc;i++) { 3856 if (!PetscBTLookup(change_basis,i)) { 3857 PetscInt *cols; 3858 3859 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3860 cols = constraints_idxs+constraints_idxs_ptr[i]; 3861 for (k=0;k<constraints_n[i];k++) { 3862 PetscInt row = total_counts+k; 3863 PetscScalar *vals; 3864 3865 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3866 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3867 } 3868 total_counts += constraints_n[i]; 3869 } 3870 } 3871 /* assembling */ 3872 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 3875 /* 3876 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3877 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3878 */ 3879 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3880 if (pcbddc->use_change_of_basis) { 3881 /* dual and primal dofs on a single cc */ 3882 PetscInt dual_dofs,primal_dofs; 3883 /* working stuff for GEQRF */ 3884 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3885 PetscBLASInt lqr_work; 3886 /* working stuff for UNGQR */ 3887 PetscScalar *gqr_work,lgqr_work_t; 3888 PetscBLASInt lgqr_work; 3889 /* working stuff for TRTRS */ 3890 PetscScalar *trs_rhs; 3891 PetscBLASInt Blas_NRHS; 3892 /* pointers for values insertion into change of basis matrix */ 3893 PetscInt *start_rows,*start_cols; 3894 PetscScalar *start_vals; 3895 /* working stuff for values insertion */ 3896 PetscBT is_primal; 3897 PetscInt *aux_primal_numbering_B; 3898 /* matrix sizes */ 3899 PetscInt global_size,local_size; 3900 /* temporary change of basis */ 3901 Mat localChangeOfBasisMatrix; 3902 /* extra space for debugging */ 3903 PetscScalar *dbg_work; 3904 3905 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3906 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3907 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3908 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3909 /* nonzeros for local mat */ 3910 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3911 for (i=0;i<pcis->n;i++) nnz[i]=1; 3912 for (i=n_vertices;i<total_counts_cc;i++) { 3913 if (PetscBTLookup(change_basis,i)) { 3914 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3915 if (PetscBTLookup(qr_needed_idx,i)) { 3916 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3917 } else { 3918 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3919 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3920 } 3921 } 3922 } 3923 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3924 ierr = PetscFree(nnz);CHKERRQ(ierr); 3925 /* Set initial identity in the matrix */ 3926 for (i=0;i<pcis->n;i++) { 3927 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3928 } 3929 3930 if (pcbddc->dbg_flag) { 3931 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3933 } 3934 3935 3936 /* Now we loop on the constraints which need a change of basis */ 3937 /* 3938 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3939 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3940 3941 Basic blocks of change of basis matrix T computed by 3942 3943 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3944 3945 | 1 0 ... 0 s_1/S | 3946 | 0 1 ... 0 s_2/S | 3947 | ... | 3948 | 0 ... 1 s_{n-1}/S | 3949 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3950 3951 with S = \sum_{i=1}^n s_i^2 3952 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3953 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3954 3955 - QR decomposition of constraints otherwise 3956 */ 3957 if (qr_needed) { 3958 /* space to store Q */ 3959 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3960 /* first we issue queries for optimal work */ 3961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3962 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3963 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3964 lqr_work = -1; 3965 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3967 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3968 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3969 lgqr_work = -1; 3970 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3971 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3972 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3973 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3974 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3975 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3976 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3977 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3978 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3979 /* array to store scaling factors for reflectors */ 3980 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3981 /* array to store rhs and solution of triangular solver */ 3982 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3983 /* allocating workspace for check */ 3984 if (pcbddc->dbg_flag) { 3985 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3986 } 3987 } 3988 /* array to store whether a node is primal or not */ 3989 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3990 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3991 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3992 if (i != total_primal_vertices) { 3993 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3994 } 3995 for (i=0;i<total_primal_vertices;i++) { 3996 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3997 } 3998 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3999 4000 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4001 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4002 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4003 if (PetscBTLookup(change_basis,total_counts)) { 4004 /* get constraint info */ 4005 primal_dofs = constraints_n[total_counts]; 4006 dual_dofs = size_of_constraint-primal_dofs; 4007 4008 if (pcbddc->dbg_flag) { 4009 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4010 } 4011 4012 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4013 4014 /* copy quadrature constraints for change of basis check */ 4015 if (pcbddc->dbg_flag) { 4016 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4017 } 4018 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4019 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4020 4021 /* compute QR decomposition of constraints */ 4022 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4023 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4024 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4025 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4026 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4027 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4028 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4029 4030 /* explictly compute R^-T */ 4031 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4032 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4033 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4034 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4035 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4036 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4037 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4038 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4039 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4040 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4041 4042 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4043 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4045 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4046 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4047 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4048 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4049 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4050 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4051 4052 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4053 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4054 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4056 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4057 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4058 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4061 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4062 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4063 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4064 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4065 4066 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4067 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4068 /* insert cols for primal dofs */ 4069 for (j=0;j<primal_dofs;j++) { 4070 start_vals = &qr_basis[j*size_of_constraint]; 4071 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4072 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4073 } 4074 /* insert cols for dual dofs */ 4075 for (j=0,k=0;j<dual_dofs;k++) { 4076 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4077 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4078 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4079 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4080 j++; 4081 } 4082 } 4083 4084 /* check change of basis */ 4085 if (pcbddc->dbg_flag) { 4086 PetscInt ii,jj; 4087 PetscBool valid_qr=PETSC_TRUE; 4088 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4093 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4094 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4095 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4096 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4097 for (jj=0;jj<size_of_constraint;jj++) { 4098 for (ii=0;ii<primal_dofs;ii++) { 4099 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4100 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4101 } 4102 } 4103 if (!valid_qr) { 4104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4105 for (jj=0;jj<size_of_constraint;jj++) { 4106 for (ii=0;ii<primal_dofs;ii++) { 4107 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4108 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4109 } 4110 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4111 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4112 } 4113 } 4114 } 4115 } else { 4116 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4117 } 4118 } 4119 } else { /* simple transformation block */ 4120 PetscInt row,col; 4121 PetscScalar val,norm; 4122 4123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4124 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4125 for (j=0;j<size_of_constraint;j++) { 4126 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4127 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4128 if (!PetscBTLookup(is_primal,row_B)) { 4129 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4130 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4131 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4132 } else { 4133 for (k=0;k<size_of_constraint;k++) { 4134 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4135 if (row != col) { 4136 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4137 } else { 4138 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4139 } 4140 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4141 } 4142 } 4143 } 4144 if (pcbddc->dbg_flag) { 4145 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4146 } 4147 } 4148 } else { 4149 if (pcbddc->dbg_flag) { 4150 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4151 } 4152 } 4153 } 4154 4155 /* free workspace */ 4156 if (qr_needed) { 4157 if (pcbddc->dbg_flag) { 4158 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4159 } 4160 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4161 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4162 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4163 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4164 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4165 } 4166 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4167 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4168 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4169 4170 /* assembling of global change of variable */ 4171 if (!pcbddc->fake_change) { 4172 Mat tmat; 4173 PetscInt bs; 4174 4175 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4176 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4177 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4178 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4179 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4180 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4181 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4182 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4183 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4184 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4185 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4186 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4187 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4188 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4189 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4190 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4191 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4192 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4193 4194 /* check */ 4195 if (pcbddc->dbg_flag) { 4196 PetscReal error; 4197 Vec x,x_change; 4198 4199 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4200 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4201 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4202 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4203 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4204 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4205 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4206 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4207 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4208 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4209 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4210 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4211 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4213 ierr = VecDestroy(&x);CHKERRQ(ierr); 4214 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4215 } 4216 /* adapt sub_schurs computed (if any) */ 4217 if (pcbddc->use_deluxe_scaling) { 4218 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4219 4220 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4221 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4222 } 4223 if (sub_schurs->S_Ej_all) { 4224 Mat S_new,tmat; 4225 IS is_all_N,is_V_Sall = NULL; 4226 4227 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4228 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4229 if (pcbddc->deluxe_zerorows) { 4230 ISLocalToGlobalMapping NtoSall; 4231 IS is_V; 4232 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4233 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4234 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4235 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4236 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4237 } 4238 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4239 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4240 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4241 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4242 if (pcbddc->deluxe_zerorows) { 4243 const PetscScalar *array; 4244 const PetscInt *idxs_V,*idxs_all; 4245 PetscInt i,n_V; 4246 4247 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4248 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4249 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4250 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4251 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4252 for (i=0;i<n_V;i++) { 4253 PetscScalar val; 4254 PetscInt idx; 4255 4256 idx = idxs_V[i]; 4257 val = array[idxs_all[idxs_V[i]]]; 4258 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4259 } 4260 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4261 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4262 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4263 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4264 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4265 } 4266 sub_schurs->S_Ej_all = S_new; 4267 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4268 if (sub_schurs->sum_S_Ej_all) { 4269 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4270 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4271 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4272 if (pcbddc->deluxe_zerorows) { 4273 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4274 } 4275 sub_schurs->sum_S_Ej_all = S_new; 4276 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4277 } 4278 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4279 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4280 } 4281 /* destroy any change of basis context in sub_schurs */ 4282 if (sub_schurs->change) { 4283 PetscInt i; 4284 4285 for (i=0;i<sub_schurs->n_subs;i++) { 4286 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4287 } 4288 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4289 } 4290 } 4291 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4292 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4293 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4294 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4295 pcbddc->use_qr_single = qr_needed; 4296 } 4297 } else if (pcbddc->user_ChangeOfBasisMatrix) { 4298 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4299 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4300 } 4301 4302 /* set up change of basis context */ 4303 if (pcbddc->ChangeOfBasisMatrix) { 4304 PCBDDCChange_ctx change_ctx; 4305 4306 if (!pcbddc->new_global_mat) { 4307 PetscInt global_size,local_size; 4308 4309 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4310 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4311 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 4312 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4313 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 4314 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 4315 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 4316 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 4317 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 4318 } else { 4319 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 4320 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 4321 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 4322 } 4323 if (!pcbddc->user_ChangeOfBasisMatrix) { 4324 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4325 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 4326 } else { 4327 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4328 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 4329 } 4330 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 4331 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 4332 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4333 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4334 } else { 4335 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 4336 } 4337 4338 if (!pcbddc->fake_change) { 4339 /* add pressure dofs to set of primal nodes for numbering purposes */ 4340 for (i=0;i<pcbddc->benign_n;i++) { 4341 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4342 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4343 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4344 pcbddc->local_primal_size_cc++; 4345 pcbddc->local_primal_size++; 4346 } 4347 4348 /* check if a new primal space has been introduced (also take into account benign trick) */ 4349 pcbddc->new_primal_space_local = PETSC_TRUE; 4350 if (olocal_primal_size == pcbddc->local_primal_size) { 4351 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4352 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4353 if (!pcbddc->new_primal_space_local) { 4354 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4355 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4356 } 4357 } 4358 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4359 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4360 } 4361 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4362 4363 /* flush dbg viewer */ 4364 if (pcbddc->dbg_flag) { 4365 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4366 } 4367 4368 /* free workspace */ 4369 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4370 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4371 if (!pcbddc->adaptive_selection) { 4372 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4373 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4374 } else { 4375 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4376 pcbddc->adaptive_constraints_idxs_ptr, 4377 pcbddc->adaptive_constraints_data_ptr, 4378 pcbddc->adaptive_constraints_idxs, 4379 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4380 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4381 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4382 } 4383 PetscFunctionReturn(0); 4384 } 4385 4386 #undef __FUNCT__ 4387 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4388 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4389 { 4390 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4391 PC_IS *pcis = (PC_IS*)pc->data; 4392 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4393 PetscInt ierr,i,N; 4394 PetscViewer viewer=pcbddc->dbg_viewer; 4395 4396 PetscFunctionBegin; 4397 /* Reset previously computed graph */ 4398 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4399 /* Init local Graph struct */ 4400 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4401 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4402 4403 /* Check validity of the csr graph passed in by the user */ 4404 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4405 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4406 } 4407 4408 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4409 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4410 PetscInt *xadj,*adjncy; 4411 PetscInt nvtxs; 4412 PetscBool flg_row=PETSC_FALSE; 4413 4414 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4415 if (flg_row) { 4416 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4417 pcbddc->computed_rowadj = PETSC_TRUE; 4418 } 4419 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4420 } 4421 if (pcbddc->dbg_flag) { 4422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4423 } 4424 4425 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4426 if (pcbddc->user_provided_isfordofs) { 4427 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4428 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4429 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4430 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4431 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4432 } 4433 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4434 pcbddc->n_ISForDofs = 0; 4435 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4436 } 4437 } else { 4438 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4439 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4440 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4441 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4442 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4443 } 4444 } 4445 } 4446 4447 /* Setup of Graph */ 4448 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4449 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4450 } 4451 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4452 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4453 } 4454 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4455 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4456 } 4457 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4458 4459 /* attach info on disconnected subdomains if present */ 4460 if (pcbddc->n_local_subs) { 4461 PetscInt *local_subs; 4462 4463 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4464 for (i=0;i<pcbddc->n_local_subs;i++) { 4465 const PetscInt *idxs; 4466 PetscInt nl,j; 4467 4468 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4469 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4470 for (j=0;j<nl;j++) { 4471 local_subs[idxs[j]] = i; 4472 } 4473 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4474 } 4475 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4476 pcbddc->mat_graph->local_subs = local_subs; 4477 } 4478 4479 /* Graph's connected components analysis */ 4480 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4481 4482 /* print some info to stdout */ 4483 if (pcbddc->dbg_flag) { 4484 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4485 } 4486 PetscFunctionReturn(0); 4487 } 4488 4489 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4490 #undef __FUNCT__ 4491 #define __FUNCT__ "PCBDDCSubsetNumbering" 4492 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4493 { 4494 PetscSF sf; 4495 PetscLayout map; 4496 const PetscInt *idxs; 4497 PetscInt *leaf_data,*root_data,*gidxs; 4498 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4499 PetscInt n_n,nlocals,start,first_index; 4500 PetscMPIInt commsize; 4501 PetscBool first_found; 4502 PetscErrorCode ierr; 4503 4504 PetscFunctionBegin; 4505 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4506 if (subset_mult) { 4507 PetscCheckSameComm(subset,1,subset_mult,2); 4508 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4509 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4510 } 4511 /* create workspace layout for computing global indices of subset */ 4512 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4513 lbounds[0] = lbounds[1] = 0; 4514 for (i=0;i<n;i++) { 4515 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4516 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4517 } 4518 lbounds[0] = -lbounds[0]; 4519 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4520 gbounds[0] = -gbounds[0]; 4521 N = gbounds[1] - gbounds[0] + 1; 4522 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4523 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4524 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4525 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4526 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4527 4528 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4529 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4530 if (subset_mult) { 4531 const PetscInt* idxs_mult; 4532 4533 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4534 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4535 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4536 } else { 4537 for (i=0;i<n;i++) leaf_data[i] = 1; 4538 } 4539 /* local size of new subset */ 4540 n_n = 0; 4541 for (i=0;i<n;i++) n_n += leaf_data[i]; 4542 4543 /* global indexes in layout */ 4544 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4545 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4546 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4547 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4548 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4549 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4550 4551 /* reduce from leaves to roots */ 4552 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4553 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4554 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4555 4556 /* count indexes in local part of layout */ 4557 nlocals = 0; 4558 first_index = -1; 4559 first_found = PETSC_FALSE; 4560 for (i=0;i<Nl;i++) { 4561 if (!first_found && root_data[i]) { 4562 first_found = PETSC_TRUE; 4563 first_index = i; 4564 } 4565 nlocals += root_data[i]; 4566 } 4567 4568 /* cumulative of number of indexes and size of subset without holes */ 4569 #if defined(PETSC_HAVE_MPI_EXSCAN) 4570 start = 0; 4571 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4572 #else 4573 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4574 start = start-nlocals; 4575 #endif 4576 4577 if (N_n) { /* compute total size of new subset if requested */ 4578 *N_n = start + nlocals; 4579 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4580 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4581 } 4582 4583 /* adapt root data with cumulative */ 4584 if (first_found) { 4585 PetscInt old_index; 4586 4587 root_data[first_index] += start; 4588 old_index = first_index; 4589 for (i=first_index+1;i<Nl;i++) { 4590 if (root_data[i]) { 4591 root_data[i] += root_data[old_index]; 4592 old_index = i; 4593 } 4594 } 4595 } 4596 4597 /* from roots to leaves */ 4598 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4599 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4600 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4601 4602 /* create new IS with global indexes without holes */ 4603 if (subset_mult) { 4604 const PetscInt* idxs_mult; 4605 PetscInt cum; 4606 4607 cum = 0; 4608 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4609 for (i=0;i<n;i++) { 4610 PetscInt j; 4611 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4612 } 4613 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4614 } else { 4615 for (i=0;i<n;i++) { 4616 gidxs[i] = leaf_data[i]-1; 4617 } 4618 } 4619 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4620 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4621 PetscFunctionReturn(0); 4622 } 4623 4624 #undef __FUNCT__ 4625 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4626 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4627 { 4628 PetscInt i,j; 4629 PetscScalar *alphas; 4630 PetscErrorCode ierr; 4631 4632 PetscFunctionBegin; 4633 /* this implements stabilized Gram-Schmidt */ 4634 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4635 for (i=0;i<n;i++) { 4636 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4637 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4638 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4639 } 4640 ierr = PetscFree(alphas);CHKERRQ(ierr); 4641 PetscFunctionReturn(0); 4642 } 4643 4644 #undef __FUNCT__ 4645 #define __FUNCT__ "MatISGetSubassemblingPattern" 4646 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4647 { 4648 IS ranks_send_to; 4649 PetscInt n_neighs,*neighs,*n_shared,**shared; 4650 PetscMPIInt size,rank,color; 4651 PetscInt *xadj,*adjncy; 4652 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4653 PetscInt i,local_size,threshold=0; 4654 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4655 PetscSubcomm subcomm; 4656 PetscErrorCode ierr; 4657 4658 PetscFunctionBegin; 4659 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4660 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4661 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4662 4663 /* Get info on mapping */ 4664 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4665 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4666 4667 /* build local CSR graph of subdomains' connectivity */ 4668 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4669 xadj[0] = 0; 4670 xadj[1] = PetscMax(n_neighs-1,0); 4671 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4672 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4673 4674 if (threshold) { 4675 PetscInt xadj_count = 0; 4676 for (i=1;i<n_neighs;i++) { 4677 if (n_shared[i] > threshold) { 4678 adjncy[xadj_count] = neighs[i]; 4679 adjncy_wgt[xadj_count] = n_shared[i]; 4680 xadj_count++; 4681 } 4682 } 4683 xadj[1] = xadj_count; 4684 } else { 4685 if (xadj[1]) { 4686 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4687 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4688 } 4689 } 4690 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4691 if (use_square) { 4692 for (i=0;i<xadj[1];i++) { 4693 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4694 } 4695 } 4696 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4697 4698 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4699 4700 /* 4701 Restrict work on active processes only. 4702 */ 4703 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4704 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4705 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4706 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4707 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4708 if (color) { 4709 ierr = PetscFree(xadj);CHKERRQ(ierr); 4710 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4711 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4712 } else { 4713 Mat subdomain_adj; 4714 IS new_ranks,new_ranks_contig; 4715 MatPartitioning partitioner; 4716 PetscInt prank,rstart=0,rend=0; 4717 PetscInt *is_indices,*oldranks; 4718 PetscBool aggregate; 4719 4720 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4721 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4722 prank = rank; 4723 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4724 /* 4725 for (i=0;i<size;i++) { 4726 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4727 } 4728 */ 4729 for (i=0;i<xadj[1];i++) { 4730 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4731 } 4732 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4733 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4734 if (aggregate) { 4735 PetscInt lrows,row,ncols,*cols; 4736 PetscMPIInt nrank; 4737 PetscScalar *vals; 4738 4739 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4740 lrows = 0; 4741 if (nrank<redprocs) { 4742 lrows = size/redprocs; 4743 if (nrank<size%redprocs) lrows++; 4744 } 4745 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4746 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4747 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4748 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4749 row = nrank; 4750 ncols = xadj[1]-xadj[0]; 4751 cols = adjncy; 4752 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4753 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4754 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4755 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4756 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4757 ierr = PetscFree(xadj);CHKERRQ(ierr); 4758 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4759 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4760 ierr = PetscFree(vals);CHKERRQ(ierr); 4761 } else { 4762 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4763 } 4764 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4765 4766 /* Partition */ 4767 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4768 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4769 if (use_vwgt) { 4770 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4771 v_wgt[0] = local_size; 4772 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4773 } 4774 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4775 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4776 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4777 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4778 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4779 4780 /* renumber new_ranks to avoid "holes" in new set of processors */ 4781 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4782 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4783 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4784 if (!redprocs) { 4785 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4786 } else { 4787 PetscInt idxs[1]; 4788 PetscMPIInt tag; 4789 MPI_Request *reqs; 4790 4791 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4792 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4793 for (i=rstart;i<rend;i++) { 4794 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4795 } 4796 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4797 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4798 ierr = PetscFree(reqs);CHKERRQ(ierr); 4799 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4800 } 4801 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4802 /* clean up */ 4803 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4804 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4805 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4806 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4807 } 4808 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4809 4810 /* assemble parallel IS for sends */ 4811 i = 1; 4812 if (color) i=0; 4813 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4814 /* get back IS */ 4815 *is_sends = ranks_send_to; 4816 PetscFunctionReturn(0); 4817 } 4818 4819 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4820 4821 #undef __FUNCT__ 4822 #define __FUNCT__ "MatISSubassemble" 4823 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4824 { 4825 Mat local_mat; 4826 IS is_sends_internal; 4827 PetscInt rows,cols,new_local_rows; 4828 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4829 PetscBool ismatis,isdense,newisdense,destroy_mat; 4830 ISLocalToGlobalMapping l2gmap; 4831 PetscInt* l2gmap_indices; 4832 const PetscInt* is_indices; 4833 MatType new_local_type; 4834 /* buffers */ 4835 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4836 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4837 PetscInt *recv_buffer_idxs_local; 4838 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4839 /* MPI */ 4840 MPI_Comm comm,comm_n; 4841 PetscSubcomm subcomm; 4842 PetscMPIInt n_sends,n_recvs,commsize; 4843 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4844 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4845 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4846 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4847 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4848 PetscErrorCode ierr; 4849 4850 PetscFunctionBegin; 4851 /* TODO: add missing checks */ 4852 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4853 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4854 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4855 PetscValidLogicalCollectiveInt(mat,nis,7); 4856 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4857 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4858 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4859 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4860 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4861 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4862 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4863 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4864 PetscInt mrows,mcols,mnrows,mncols; 4865 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4866 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4867 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4868 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4869 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4870 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4871 } 4872 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4873 PetscValidLogicalCollectiveInt(mat,bs,0); 4874 /* prepare IS for sending if not provided */ 4875 if (!is_sends) { 4876 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4877 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4878 } else { 4879 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4880 is_sends_internal = is_sends; 4881 } 4882 4883 /* get comm */ 4884 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4885 4886 /* compute number of sends */ 4887 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4888 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4889 4890 /* compute number of receives */ 4891 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4892 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4893 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4894 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4895 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4896 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4897 ierr = PetscFree(iflags);CHKERRQ(ierr); 4898 4899 /* restrict comm if requested */ 4900 subcomm = 0; 4901 destroy_mat = PETSC_FALSE; 4902 if (restrict_comm) { 4903 PetscMPIInt color,subcommsize; 4904 4905 color = 0; 4906 if (restrict_full) { 4907 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4908 } else { 4909 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4910 } 4911 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4912 subcommsize = commsize - subcommsize; 4913 /* check if reuse has been requested */ 4914 if (reuse == MAT_REUSE_MATRIX) { 4915 if (*mat_n) { 4916 PetscMPIInt subcommsize2; 4917 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4918 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4919 comm_n = PetscObjectComm((PetscObject)*mat_n); 4920 } else { 4921 comm_n = PETSC_COMM_SELF; 4922 } 4923 } else { /* MAT_INITIAL_MATRIX */ 4924 PetscMPIInt rank; 4925 4926 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4927 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4928 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4929 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4930 comm_n = PetscSubcommChild(subcomm); 4931 } 4932 /* flag to destroy *mat_n if not significative */ 4933 if (color) destroy_mat = PETSC_TRUE; 4934 } else { 4935 comm_n = comm; 4936 } 4937 4938 /* prepare send/receive buffers */ 4939 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4940 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4941 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4942 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4943 if (nis) { 4944 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4945 } 4946 4947 /* Get data from local matrices */ 4948 if (!isdense) { 4949 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4950 /* TODO: See below some guidelines on how to prepare the local buffers */ 4951 /* 4952 send_buffer_vals should contain the raw values of the local matrix 4953 send_buffer_idxs should contain: 4954 - MatType_PRIVATE type 4955 - PetscInt size_of_l2gmap 4956 - PetscInt global_row_indices[size_of_l2gmap] 4957 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4958 */ 4959 } else { 4960 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4961 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4962 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4963 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4964 send_buffer_idxs[1] = i; 4965 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4966 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4967 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4968 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4969 for (i=0;i<n_sends;i++) { 4970 ilengths_vals[is_indices[i]] = len*len; 4971 ilengths_idxs[is_indices[i]] = len+2; 4972 } 4973 } 4974 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4975 /* additional is (if any) */ 4976 if (nis) { 4977 PetscMPIInt psum; 4978 PetscInt j; 4979 for (j=0,psum=0;j<nis;j++) { 4980 PetscInt plen; 4981 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4982 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4983 psum += len+1; /* indices + lenght */ 4984 } 4985 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4986 for (j=0,psum=0;j<nis;j++) { 4987 PetscInt plen; 4988 const PetscInt *is_array_idxs; 4989 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4990 send_buffer_idxs_is[psum] = plen; 4991 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4992 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4993 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4994 psum += plen+1; /* indices + lenght */ 4995 } 4996 for (i=0;i<n_sends;i++) { 4997 ilengths_idxs_is[is_indices[i]] = psum; 4998 } 4999 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5000 } 5001 5002 buf_size_idxs = 0; 5003 buf_size_vals = 0; 5004 buf_size_idxs_is = 0; 5005 for (i=0;i<n_recvs;i++) { 5006 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5007 buf_size_vals += (PetscInt)olengths_vals[i]; 5008 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5009 } 5010 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5011 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5012 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5013 5014 /* get new tags for clean communications */ 5015 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5016 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5017 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5018 5019 /* allocate for requests */ 5020 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5021 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5022 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5023 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5024 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5025 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5026 5027 /* communications */ 5028 ptr_idxs = recv_buffer_idxs; 5029 ptr_vals = recv_buffer_vals; 5030 ptr_idxs_is = recv_buffer_idxs_is; 5031 for (i=0;i<n_recvs;i++) { 5032 source_dest = onodes[i]; 5033 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5034 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5035 ptr_idxs += olengths_idxs[i]; 5036 ptr_vals += olengths_vals[i]; 5037 if (nis) { 5038 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5039 ptr_idxs_is += olengths_idxs_is[i]; 5040 } 5041 } 5042 for (i=0;i<n_sends;i++) { 5043 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5044 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5045 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5046 if (nis) { 5047 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5048 } 5049 } 5050 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5051 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5052 5053 /* assemble new l2g map */ 5054 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5055 ptr_idxs = recv_buffer_idxs; 5056 new_local_rows = 0; 5057 for (i=0;i<n_recvs;i++) { 5058 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5059 ptr_idxs += olengths_idxs[i]; 5060 } 5061 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5062 ptr_idxs = recv_buffer_idxs; 5063 new_local_rows = 0; 5064 for (i=0;i<n_recvs;i++) { 5065 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5066 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5067 ptr_idxs += olengths_idxs[i]; 5068 } 5069 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5070 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5071 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5072 5073 /* infer new local matrix type from received local matrices type */ 5074 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5075 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5076 if (n_recvs) { 5077 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5078 ptr_idxs = recv_buffer_idxs; 5079 for (i=0;i<n_recvs;i++) { 5080 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5081 new_local_type_private = MATAIJ_PRIVATE; 5082 break; 5083 } 5084 ptr_idxs += olengths_idxs[i]; 5085 } 5086 switch (new_local_type_private) { 5087 case MATDENSE_PRIVATE: 5088 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5089 new_local_type = MATSEQAIJ; 5090 bs = 1; 5091 } else { /* if I receive only 1 dense matrix */ 5092 new_local_type = MATSEQDENSE; 5093 bs = 1; 5094 } 5095 break; 5096 case MATAIJ_PRIVATE: 5097 new_local_type = MATSEQAIJ; 5098 bs = 1; 5099 break; 5100 case MATBAIJ_PRIVATE: 5101 new_local_type = MATSEQBAIJ; 5102 break; 5103 case MATSBAIJ_PRIVATE: 5104 new_local_type = MATSEQSBAIJ; 5105 break; 5106 default: 5107 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5108 break; 5109 } 5110 } else { /* by default, new_local_type is seqdense */ 5111 new_local_type = MATSEQDENSE; 5112 bs = 1; 5113 } 5114 5115 /* create MATIS object if needed */ 5116 if (reuse == MAT_INITIAL_MATRIX) { 5117 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5118 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5119 } else { 5120 /* it also destroys the local matrices */ 5121 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5122 } 5123 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5124 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5125 5126 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5127 5128 /* Global to local map of received indices */ 5129 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5130 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5131 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5132 5133 /* restore attributes -> type of incoming data and its size */ 5134 buf_size_idxs = 0; 5135 for (i=0;i<n_recvs;i++) { 5136 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5137 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5138 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5139 } 5140 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5141 5142 /* set preallocation */ 5143 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5144 if (!newisdense) { 5145 PetscInt *new_local_nnz=0; 5146 5147 ptr_vals = recv_buffer_vals; 5148 ptr_idxs = recv_buffer_idxs_local; 5149 if (n_recvs) { 5150 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5151 } 5152 for (i=0;i<n_recvs;i++) { 5153 PetscInt j; 5154 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5155 for (j=0;j<*(ptr_idxs+1);j++) { 5156 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5157 } 5158 } else { 5159 /* TODO */ 5160 } 5161 ptr_idxs += olengths_idxs[i]; 5162 } 5163 if (new_local_nnz) { 5164 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5165 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5166 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5167 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5168 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5169 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5170 } else { 5171 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5172 } 5173 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5174 } else { 5175 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5176 } 5177 5178 /* set values */ 5179 ptr_vals = recv_buffer_vals; 5180 ptr_idxs = recv_buffer_idxs_local; 5181 for (i=0;i<n_recvs;i++) { 5182 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5183 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5184 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5185 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5186 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5187 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5188 } else { 5189 /* TODO */ 5190 } 5191 ptr_idxs += olengths_idxs[i]; 5192 ptr_vals += olengths_vals[i]; 5193 } 5194 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5195 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5196 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5197 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5199 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5200 5201 #if 0 5202 if (!restrict_comm) { /* check */ 5203 Vec lvec,rvec; 5204 PetscReal infty_error; 5205 5206 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5207 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5208 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5209 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5210 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5211 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5212 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5213 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5214 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5215 } 5216 #endif 5217 5218 /* assemble new additional is (if any) */ 5219 if (nis) { 5220 PetscInt **temp_idxs,*count_is,j,psum; 5221 5222 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5223 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5224 ptr_idxs = recv_buffer_idxs_is; 5225 psum = 0; 5226 for (i=0;i<n_recvs;i++) { 5227 for (j=0;j<nis;j++) { 5228 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5229 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5230 psum += plen; 5231 ptr_idxs += plen+1; /* shift pointer to received data */ 5232 } 5233 } 5234 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5235 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5236 for (i=1;i<nis;i++) { 5237 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5238 } 5239 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5240 ptr_idxs = recv_buffer_idxs_is; 5241 for (i=0;i<n_recvs;i++) { 5242 for (j=0;j<nis;j++) { 5243 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5244 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5245 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5246 ptr_idxs += plen+1; /* shift pointer to received data */ 5247 } 5248 } 5249 for (i=0;i<nis;i++) { 5250 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5251 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5252 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5253 } 5254 ierr = PetscFree(count_is);CHKERRQ(ierr); 5255 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5256 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5257 } 5258 /* free workspace */ 5259 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5260 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5261 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5262 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5263 if (isdense) { 5264 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5265 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5266 } else { 5267 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5268 } 5269 if (nis) { 5270 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5271 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5272 } 5273 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5274 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5275 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5276 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5277 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5278 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5279 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5280 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5281 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5282 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5283 ierr = PetscFree(onodes);CHKERRQ(ierr); 5284 if (nis) { 5285 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5286 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5287 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5288 } 5289 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5290 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5291 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5292 for (i=0;i<nis;i++) { 5293 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5294 } 5295 *mat_n = NULL; 5296 } 5297 PetscFunctionReturn(0); 5298 } 5299 5300 /* temporary hack into ksp private data structure */ 5301 #include <petsc/private/kspimpl.h> 5302 5303 #undef __FUNCT__ 5304 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5305 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5306 { 5307 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5308 PC_IS *pcis = (PC_IS*)pc->data; 5309 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5310 MatNullSpace CoarseNullSpace=NULL; 5311 ISLocalToGlobalMapping coarse_islg; 5312 IS coarse_is,*isarray; 5313 PetscInt i,im_active=-1,active_procs=-1; 5314 PetscInt nis,nisdofs,nisneu,nisvert; 5315 PC pc_temp; 5316 PCType coarse_pc_type; 5317 KSPType coarse_ksp_type; 5318 PetscBool multilevel_requested,multilevel_allowed; 5319 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5320 Mat t_coarse_mat_is; 5321 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5322 PetscMPIInt all_procs; 5323 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5324 PetscBool compute_vecs = PETSC_FALSE; 5325 PetscScalar *array; 5326 PetscErrorCode ierr; 5327 5328 PetscFunctionBegin; 5329 /* Assign global numbering to coarse dofs */ 5330 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5331 PetscInt ocoarse_size; 5332 compute_vecs = PETSC_TRUE; 5333 ocoarse_size = pcbddc->coarse_size; 5334 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5335 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5336 /* see if we can avoid some work */ 5337 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5338 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5339 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5340 PC pc; 5341 PetscBool isbddc; 5342 5343 /* temporary workaround since PCBDDC does not have a reset method so far */ 5344 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5345 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5346 if (isbddc) { 5347 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5348 } else { 5349 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5350 } 5351 coarse_reuse = PETSC_FALSE; 5352 } else { /* we can safely reuse already computed coarse matrix */ 5353 coarse_reuse = PETSC_TRUE; 5354 } 5355 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5356 coarse_reuse = PETSC_FALSE; 5357 } 5358 /* reset any subassembling information */ 5359 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5360 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5361 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5362 coarse_reuse = PETSC_TRUE; 5363 } 5364 5365 /* count "active" (i.e. with positive local size) and "void" processes */ 5366 im_active = !!(pcis->n); 5367 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5368 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5369 void_procs = all_procs-active_procs; 5370 csin_type_simple = PETSC_TRUE; 5371 redist = PETSC_FALSE; 5372 if (pcbddc->current_level && void_procs) { 5373 csin_ml = PETSC_TRUE; 5374 ncoarse_ml = void_procs; 5375 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5376 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5377 csin_ds = PETSC_TRUE; 5378 ncoarse_ds = pcbddc->redistribute_coarse; 5379 redist = PETSC_TRUE; 5380 } else { 5381 csin_ds = PETSC_TRUE; 5382 ncoarse_ds = active_procs; 5383 redist = PETSC_TRUE; 5384 } 5385 } else { 5386 csin_ml = PETSC_FALSE; 5387 ncoarse_ml = all_procs; 5388 if (void_procs) { 5389 csin_ds = PETSC_TRUE; 5390 ncoarse_ds = void_procs; 5391 csin_type_simple = PETSC_FALSE; 5392 } else { 5393 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5394 csin_ds = PETSC_TRUE; 5395 ncoarse_ds = pcbddc->redistribute_coarse; 5396 redist = PETSC_TRUE; 5397 } else { 5398 csin_ds = PETSC_FALSE; 5399 ncoarse_ds = all_procs; 5400 } 5401 } 5402 } 5403 5404 /* 5405 test if we can go multilevel: three conditions must be satisfied: 5406 - we have not exceeded the number of levels requested 5407 - we can actually subassemble the active processes 5408 - we can find a suitable number of MPI processes where we can place the subassembled problem 5409 */ 5410 multilevel_allowed = PETSC_FALSE; 5411 multilevel_requested = PETSC_FALSE; 5412 if (pcbddc->current_level < pcbddc->max_levels) { 5413 multilevel_requested = PETSC_TRUE; 5414 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5415 multilevel_allowed = PETSC_FALSE; 5416 } else { 5417 multilevel_allowed = PETSC_TRUE; 5418 } 5419 } 5420 /* determine number of process partecipating to coarse solver */ 5421 if (multilevel_allowed) { 5422 ncoarse = ncoarse_ml; 5423 csin = csin_ml; 5424 redist = PETSC_FALSE; 5425 } else { 5426 ncoarse = ncoarse_ds; 5427 csin = csin_ds; 5428 } 5429 5430 /* creates temporary l2gmap and IS for coarse indexes */ 5431 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5432 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5433 5434 /* creates temporary MATIS object for coarse matrix */ 5435 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5436 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5437 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5438 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5439 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5440 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5441 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5442 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5443 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5444 5445 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5446 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5447 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5448 const PetscInt *idxs; 5449 ISLocalToGlobalMapping tmap; 5450 5451 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5452 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5453 /* allocate space for temporary storage */ 5454 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5455 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5456 /* allocate for IS array */ 5457 nisdofs = pcbddc->n_ISForDofsLocal; 5458 nisneu = !!pcbddc->NeumannBoundariesLocal; 5459 nisvert = 0; 5460 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5461 nisvert = 1; 5462 } 5463 nis = nisdofs + nisneu + nisvert; 5464 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5465 /* dofs splitting */ 5466 for (i=0;i<nisdofs;i++) { 5467 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5468 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5469 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5470 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5471 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5472 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5473 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5474 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5475 } 5476 /* neumann boundaries */ 5477 if (pcbddc->NeumannBoundariesLocal) { 5478 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5479 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5480 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5481 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5482 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5483 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5484 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5485 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5486 } 5487 /* primal vertices (benign) */ 5488 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5489 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5490 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5491 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5492 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5493 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5494 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5495 } 5496 /* free memory */ 5497 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5498 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5499 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5500 } else { 5501 nis = 0; 5502 nisdofs = 0; 5503 nisneu = 0; 5504 nisvert = 0; 5505 isarray = NULL; 5506 } 5507 /* destroy no longer needed map */ 5508 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5509 5510 /* restrict on coarse candidates (if needed) */ 5511 coarse_mat_is = NULL; 5512 if (csin) { 5513 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5514 if (redist) { 5515 PetscMPIInt rank; 5516 PetscInt spc,n_spc_p1,dest[1],destsize; 5517 5518 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5519 spc = active_procs/ncoarse; 5520 n_spc_p1 = active_procs%ncoarse; 5521 if (im_active) { 5522 destsize = 1; 5523 if (rank > n_spc_p1*(spc+1)-1) { 5524 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5525 } else { 5526 dest[0] = rank/(spc+1); 5527 } 5528 } else { 5529 destsize = 0; 5530 } 5531 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5532 } else if (csin_type_simple) { 5533 PetscMPIInt rank; 5534 PetscInt issize,isidx; 5535 5536 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5537 if (im_active) { 5538 issize = 1; 5539 isidx = (PetscInt)rank; 5540 } else { 5541 issize = 0; 5542 isidx = -1; 5543 } 5544 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5545 } else { /* get a suitable subassembling pattern from MATIS code */ 5546 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5547 } 5548 5549 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5550 if (!redist || ncoarse <= void_procs) { 5551 PetscInt ncoarse_cand,tissize,*nisindices; 5552 PetscInt *coarse_candidates; 5553 const PetscInt* tisindices; 5554 5555 /* get coarse candidates' ranks in pc communicator */ 5556 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5557 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5558 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5559 if (!coarse_candidates[i]) { 5560 coarse_candidates[ncoarse_cand++]=i; 5561 } 5562 } 5563 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5564 5565 5566 if (pcbddc->dbg_flag) { 5567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5568 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5569 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5571 for (i=0;i<ncoarse_cand;i++) { 5572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5573 } 5574 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5576 } 5577 /* shift the pattern on coarse candidates */ 5578 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5579 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5580 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5581 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5582 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5583 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5584 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5585 } 5586 if (pcbddc->dbg_flag) { 5587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5589 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5590 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5591 } 5592 } 5593 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5594 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5595 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5596 } else { /* this is the last level, so use just receiving processes in subcomm */ 5597 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5598 } 5599 } else { 5600 if (pcbddc->dbg_flag) { 5601 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5602 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5603 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5604 } 5605 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5606 coarse_mat_is = t_coarse_mat_is; 5607 } 5608 5609 /* create local to global scatters for coarse problem */ 5610 if (compute_vecs) { 5611 PetscInt lrows; 5612 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5613 if (coarse_mat_is) { 5614 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5615 } else { 5616 lrows = 0; 5617 } 5618 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5619 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5620 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5621 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5622 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5623 } 5624 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5625 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5626 5627 /* set defaults for coarse KSP and PC */ 5628 if (multilevel_allowed) { 5629 coarse_ksp_type = KSPRICHARDSON; 5630 coarse_pc_type = PCBDDC; 5631 } else { 5632 coarse_ksp_type = KSPPREONLY; 5633 coarse_pc_type = PCREDUNDANT; 5634 } 5635 5636 /* print some info if requested */ 5637 if (pcbddc->dbg_flag) { 5638 if (!multilevel_allowed) { 5639 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5640 if (multilevel_requested) { 5641 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5642 } else if (pcbddc->max_levels) { 5643 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5644 } 5645 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5646 } 5647 } 5648 5649 /* create the coarse KSP object only once with defaults */ 5650 if (coarse_mat_is) { 5651 MatReuse coarse_mat_reuse; 5652 PetscViewer dbg_viewer = NULL; 5653 if (pcbddc->dbg_flag) { 5654 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5655 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5656 } 5657 if (!pcbddc->coarse_ksp) { 5658 char prefix[256],str_level[16]; 5659 size_t len; 5660 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5661 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5662 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5663 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5664 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5665 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5666 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5667 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5668 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5669 /* prefix */ 5670 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5671 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5672 if (!pcbddc->current_level) { 5673 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5674 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5675 } else { 5676 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5677 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5678 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5679 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5680 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5681 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5682 } 5683 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5684 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5685 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5686 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5687 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5688 /* allow user customization */ 5689 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5690 } 5691 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5692 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5693 if (nisdofs) { 5694 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5695 for (i=0;i<nisdofs;i++) { 5696 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5697 } 5698 } 5699 if (nisneu) { 5700 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5701 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5702 } 5703 if (nisvert) { 5704 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5705 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5706 } 5707 5708 /* get some info after set from options */ 5709 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5710 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5711 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5712 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5713 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5714 isbddc = PETSC_FALSE; 5715 } 5716 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5717 if (isredundant) { 5718 KSP inner_ksp; 5719 PC inner_pc; 5720 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5721 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5722 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5723 } 5724 5725 /* assemble coarse matrix */ 5726 if (coarse_reuse) { 5727 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5728 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5729 coarse_mat_reuse = MAT_REUSE_MATRIX; 5730 } else { 5731 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5732 } 5733 if (isbddc || isnn) { 5734 if (isbddc) { /* currently there are no APIs for these options */ 5735 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5736 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5737 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5738 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5739 if (pcbddc_coarse->benign_compute_nonetflux) { 5740 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5741 } 5742 } 5743 if (pcbddc->coarsening_ratio > 1) { 5744 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5745 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5746 if (pcbddc->dbg_flag) { 5747 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5748 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5749 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5750 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5751 } 5752 } 5753 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5754 } else { 5755 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5756 coarse_mat = coarse_mat_is; 5757 } 5758 } else { 5759 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5760 } 5761 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5762 5763 /* propagate symmetry info of coarse matrix */ 5764 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5765 if (pc->pmat->symmetric_set) { 5766 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5767 } 5768 if (pc->pmat->hermitian_set) { 5769 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5770 } 5771 if (pc->pmat->spd_set) { 5772 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5773 } 5774 /* set operators */ 5775 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5776 if (pcbddc->dbg_flag) { 5777 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5778 } 5779 } else { /* processes non partecipating to coarse solver (if any) */ 5780 coarse_mat = 0; 5781 } 5782 ierr = PetscFree(isarray);CHKERRQ(ierr); 5783 #if 0 5784 { 5785 PetscViewer viewer; 5786 char filename[256]; 5787 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5788 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5789 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5790 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5791 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5792 } 5793 #endif 5794 5795 /* Compute coarse null space (special handling by BDDC only) */ 5796 #if 0 5797 if (pcbddc->NullSpace) { 5798 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5799 } 5800 #endif 5801 /* hack */ 5802 if (pcbddc->coarse_ksp) { 5803 Vec crhs,csol; 5804 5805 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5806 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5807 if (!csol) { 5808 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5809 } 5810 if (!crhs) { 5811 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5812 } 5813 } 5814 5815 /* compute null space for coarse solver if the benign trick has been requested */ 5816 if (pcbddc->benign_null) { 5817 5818 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5819 for (i=0;i<pcbddc->benign_n;i++) { 5820 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5821 } 5822 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5823 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5824 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5825 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5826 if (coarse_mat) { 5827 Vec nullv; 5828 PetscScalar *array,*array2; 5829 PetscInt nl; 5830 5831 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5832 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5833 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5834 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5835 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5836 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5837 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5838 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5839 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5840 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5841 } 5842 } 5843 5844 if (pcbddc->coarse_ksp) { 5845 PetscBool ispreonly; 5846 5847 if (CoarseNullSpace) { 5848 PetscBool isnull; 5849 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5850 if (0) { 5851 if (isbddc && !pcbddc->benign_saddle_point) { 5852 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5853 } else { 5854 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5855 } 5856 } else { 5857 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5858 } 5859 } 5860 /* setup coarse ksp */ 5861 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5862 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5863 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5864 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5865 KSP check_ksp; 5866 KSPType check_ksp_type; 5867 PC check_pc; 5868 Vec check_vec,coarse_vec; 5869 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5870 PetscInt its; 5871 PetscBool compute_eigs; 5872 PetscReal *eigs_r,*eigs_c; 5873 PetscInt neigs; 5874 const char *prefix; 5875 5876 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5877 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5878 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5879 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5880 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5881 if (ispreonly) { 5882 check_ksp_type = KSPPREONLY; 5883 compute_eigs = PETSC_FALSE; 5884 } else { 5885 check_ksp_type = KSPGMRES; 5886 compute_eigs = PETSC_TRUE; 5887 } 5888 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5889 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5890 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5891 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5892 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5893 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5894 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5895 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5896 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5897 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5898 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5899 /* create random vec */ 5900 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5901 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5902 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5903 if (CoarseNullSpace) { 5904 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5905 } 5906 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5907 /* solve coarse problem */ 5908 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5909 if (CoarseNullSpace) { 5910 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5911 } 5912 /* set eigenvalue estimation if preonly has not been requested */ 5913 if (compute_eigs) { 5914 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5915 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5916 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5917 lambda_max = eigs_r[neigs-1]; 5918 lambda_min = eigs_r[0]; 5919 if (pcbddc->use_coarse_estimates) { 5920 if (lambda_max>lambda_min) { 5921 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5922 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5923 } 5924 } 5925 } 5926 5927 /* check coarse problem residual error */ 5928 if (pcbddc->dbg_flag) { 5929 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5930 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5931 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5932 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5933 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5934 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5935 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5936 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5937 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5938 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5939 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5940 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5941 if (CoarseNullSpace) { 5942 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5943 } 5944 if (compute_eigs) { 5945 PetscReal lambda_max_s,lambda_min_s; 5946 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5947 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5948 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5949 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5950 for (i=0;i<neigs;i++) { 5951 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5952 } 5953 } 5954 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5955 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5956 } 5957 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5958 if (compute_eigs) { 5959 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5960 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5961 } 5962 } 5963 } 5964 /* print additional info */ 5965 if (pcbddc->dbg_flag) { 5966 /* waits until all processes reaches this point */ 5967 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5970 } 5971 5972 /* free memory */ 5973 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5974 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5975 PetscFunctionReturn(0); 5976 } 5977 5978 #undef __FUNCT__ 5979 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5980 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5981 { 5982 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5983 PC_IS* pcis = (PC_IS*)pc->data; 5984 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5985 IS subset,subset_mult,subset_n; 5986 PetscInt local_size,coarse_size=0; 5987 PetscInt *local_primal_indices=NULL; 5988 const PetscInt *t_local_primal_indices; 5989 PetscErrorCode ierr; 5990 5991 PetscFunctionBegin; 5992 /* Compute global number of coarse dofs */ 5993 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5994 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5995 } 5996 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5997 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5998 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5999 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6000 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6001 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6002 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6003 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6004 if (local_size != pcbddc->local_primal_size) { 6005 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 6006 } 6007 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6008 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6009 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6010 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6011 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6012 6013 /* check numbering */ 6014 if (pcbddc->dbg_flag) { 6015 PetscScalar coarsesum,*array,*array2; 6016 PetscInt i; 6017 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6018 6019 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6020 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6021 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6022 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6023 /* counter */ 6024 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6025 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6026 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6027 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6028 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6029 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6030 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6031 for (i=0;i<pcbddc->local_primal_size;i++) { 6032 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6033 } 6034 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6035 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6036 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6037 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6038 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6039 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6040 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6041 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6042 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6043 for (i=0;i<pcis->n;i++) { 6044 if (array[i] != 0.0 && array[i] != array2[i]) { 6045 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6046 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6047 set_error = PETSC_TRUE; 6048 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6049 } 6050 } 6051 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6052 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6053 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6054 for (i=0;i<pcis->n;i++) { 6055 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6056 } 6057 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6058 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6059 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6060 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6061 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6062 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6063 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6064 PetscInt *gidxs; 6065 6066 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6067 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6070 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6071 for (i=0;i<pcbddc->local_primal_size;i++) { 6072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6073 } 6074 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6075 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6076 } 6077 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6078 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6079 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6080 } 6081 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6082 /* get back data */ 6083 *coarse_size_n = coarse_size; 6084 *local_primal_indices_n = local_primal_indices; 6085 PetscFunctionReturn(0); 6086 } 6087 6088 #undef __FUNCT__ 6089 #define __FUNCT__ "PCBDDCGlobalToLocal" 6090 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6091 { 6092 IS localis_t; 6093 PetscInt i,lsize,*idxs,n; 6094 PetscScalar *vals; 6095 PetscErrorCode ierr; 6096 6097 PetscFunctionBegin; 6098 /* get indices in local ordering exploiting local to global map */ 6099 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6100 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6101 for (i=0;i<lsize;i++) vals[i] = 1.0; 6102 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6103 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6104 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6105 if (idxs) { /* multilevel guard */ 6106 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6107 } 6108 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6109 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6110 ierr = PetscFree(vals);CHKERRQ(ierr); 6111 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6112 /* now compute set in local ordering */ 6113 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6114 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6115 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6116 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6117 for (i=0,lsize=0;i<n;i++) { 6118 if (PetscRealPart(vals[i]) > 0.5) { 6119 lsize++; 6120 } 6121 } 6122 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6123 for (i=0,lsize=0;i<n;i++) { 6124 if (PetscRealPart(vals[i]) > 0.5) { 6125 idxs[lsize++] = i; 6126 } 6127 } 6128 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6129 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6130 *localis = localis_t; 6131 PetscFunctionReturn(0); 6132 } 6133 6134 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 6135 #undef __FUNCT__ 6136 #define __FUNCT__ "PCBDDCMatMult_Private" 6137 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 6138 { 6139 PCBDDCChange_ctx change_ctx; 6140 PetscErrorCode ierr; 6141 6142 PetscFunctionBegin; 6143 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6144 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6145 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6146 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6147 PetscFunctionReturn(0); 6148 } 6149 6150 #undef __FUNCT__ 6151 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 6152 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 6153 { 6154 PCBDDCChange_ctx change_ctx; 6155 PetscErrorCode ierr; 6156 6157 PetscFunctionBegin; 6158 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 6159 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 6160 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 6161 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 6162 PetscFunctionReturn(0); 6163 } 6164 6165 #undef __FUNCT__ 6166 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6167 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6168 { 6169 PC_IS *pcis=(PC_IS*)pc->data; 6170 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6171 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6172 Mat S_j; 6173 PetscInt *used_xadj,*used_adjncy; 6174 PetscBool free_used_adj; 6175 PetscErrorCode ierr; 6176 6177 PetscFunctionBegin; 6178 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6179 free_used_adj = PETSC_FALSE; 6180 if (pcbddc->sub_schurs_layers == -1) { 6181 used_xadj = NULL; 6182 used_adjncy = NULL; 6183 } else { 6184 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6185 used_xadj = pcbddc->mat_graph->xadj; 6186 used_adjncy = pcbddc->mat_graph->adjncy; 6187 } else if (pcbddc->computed_rowadj) { 6188 used_xadj = pcbddc->mat_graph->xadj; 6189 used_adjncy = pcbddc->mat_graph->adjncy; 6190 } else { 6191 PetscBool flg_row=PETSC_FALSE; 6192 const PetscInt *xadj,*adjncy; 6193 PetscInt nvtxs; 6194 6195 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6196 if (flg_row) { 6197 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6198 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6199 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6200 free_used_adj = PETSC_TRUE; 6201 } else { 6202 pcbddc->sub_schurs_layers = -1; 6203 used_xadj = NULL; 6204 used_adjncy = NULL; 6205 } 6206 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6207 } 6208 } 6209 6210 /* setup sub_schurs data */ 6211 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6212 if (!sub_schurs->schur_explicit) { 6213 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6214 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6215 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6216 } else { 6217 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6218 PetscBool isseqaij; 6219 PetscInt benign_n; 6220 Mat change = NULL; 6221 IS change_primal = NULL; 6222 6223 if (!pcbddc->use_vertices && reuse_solvers) { 6224 PetscInt n_vertices; 6225 6226 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6227 reuse_solvers = (PetscBool)!n_vertices; 6228 } 6229 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6230 if (!isseqaij) { 6231 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6232 if (matis->A == pcbddc->local_mat) { 6233 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6234 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6235 } else { 6236 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6237 } 6238 } 6239 if (!pcbddc->benign_change_explicit) { 6240 benign_n = pcbddc->benign_n; 6241 } else { 6242 benign_n = 0; 6243 } 6244 /* If the user defines additional constraints, we import them here. 6245 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6246 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6247 PC_IS *pcisf; 6248 PC_BDDC *pcbddcf; 6249 PC pcf; 6250 6251 if (pcbddc->sub_schurs_rebuild) { 6252 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6253 } 6254 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6255 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6256 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6257 /* hacks */ 6258 pcisf = (PC_IS*)pcf->data; 6259 pcisf->is_B_local = pcis->is_B_local; 6260 pcisf->vec1_N = pcis->vec1_N; 6261 pcisf->BtoNmap = pcis->BtoNmap; 6262 pcisf->n = pcis->n; 6263 pcisf->n_B = pcis->n_B; 6264 pcbddcf = (PC_BDDC*)pcf->data; 6265 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6266 pcbddcf->mat_graph = pcbddc->mat_graph; 6267 pcbddcf->use_faces = PETSC_TRUE; 6268 pcbddcf->use_change_of_basis = PETSC_TRUE; 6269 pcbddcf->use_change_on_faces = PETSC_TRUE; 6270 pcbddcf->use_qr_single = PETSC_TRUE; 6271 pcbddcf->fake_change = PETSC_TRUE; 6272 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6273 /* store information on primal vertices and change of basis (in local numbering) */ 6274 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6275 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6276 change = pcbddcf->ConstraintMatrix; 6277 pcbddcf->ConstraintMatrix = NULL; 6278 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6279 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6280 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6281 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6282 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6283 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6284 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6285 pcf->ops->destroy = NULL; 6286 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6287 } 6288 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6289 ierr = MatDestroy(&change);CHKERRQ(ierr); 6290 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6291 } 6292 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6293 6294 /* free adjacency */ 6295 if (free_used_adj) { 6296 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6297 } 6298 PetscFunctionReturn(0); 6299 } 6300 6301 #undef __FUNCT__ 6302 #define __FUNCT__ "PCBDDCInitSubSchurs" 6303 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6304 { 6305 PC_IS *pcis=(PC_IS*)pc->data; 6306 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6307 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6308 PCBDDCGraph graph; 6309 PetscErrorCode ierr; 6310 6311 PetscFunctionBegin; 6312 /* attach interface graph for determining subsets */ 6313 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6314 IS verticesIS,verticescomm; 6315 PetscInt vsize,*idxs; 6316 6317 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6318 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6319 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6320 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6321 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6322 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6323 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6324 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6325 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6326 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6327 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6328 /* 6329 if (pcbddc->dbg_flag) { 6330 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6331 } 6332 */ 6333 } else { 6334 graph = pcbddc->mat_graph; 6335 } 6336 6337 /* sub_schurs init */ 6338 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6339 6340 /* free graph struct */ 6341 if (pcbddc->sub_schurs_rebuild) { 6342 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6343 } 6344 PetscFunctionReturn(0); 6345 } 6346 6347 #undef __FUNCT__ 6348 #define __FUNCT__ "PCBDDCCheckOperator" 6349 PetscErrorCode PCBDDCCheckOperator(PC pc) 6350 { 6351 PC_IS *pcis=(PC_IS*)pc->data; 6352 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6353 PetscErrorCode ierr; 6354 6355 PetscFunctionBegin; 6356 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6357 IS zerodiag = NULL; 6358 Mat S_j,B0_B=NULL; 6359 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6360 PetscScalar *p0_check,*array,*array2; 6361 PetscReal norm; 6362 PetscInt i; 6363 6364 /* B0 and B0_B */ 6365 if (zerodiag) { 6366 IS dummy; 6367 6368 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6369 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6370 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6371 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6372 } 6373 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6374 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6375 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6376 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6377 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6378 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6379 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6380 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6381 /* S_j */ 6382 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6383 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6384 6385 /* mimic vector in \widetilde{W}_\Gamma */ 6386 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6387 /* continuous in primal space */ 6388 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6389 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6390 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6391 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6392 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6393 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6394 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6395 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6396 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6397 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6398 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6399 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6400 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6401 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6402 6403 /* assemble rhs for coarse problem */ 6404 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6405 /* local with Schur */ 6406 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6407 if (zerodiag) { 6408 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6409 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6410 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6411 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6412 } 6413 /* sum on primal nodes the local contributions */ 6414 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6415 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6416 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6417 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6418 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6419 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6420 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6421 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6422 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6423 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6424 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6425 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6426 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6427 /* scale primal nodes (BDDC sums contibutions) */ 6428 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6429 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6430 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6431 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6432 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6433 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6434 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6435 /* global: \widetilde{B0}_B w_\Gamma */ 6436 if (zerodiag) { 6437 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6438 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6439 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6440 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6441 } 6442 /* BDDC */ 6443 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6444 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6445 6446 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6447 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6448 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6449 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6450 for (i=0;i<pcbddc->benign_n;i++) { 6451 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6452 } 6453 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6454 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6455 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6456 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6457 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6458 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6459 } 6460 PetscFunctionReturn(0); 6461 } 6462