1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 8 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 9 { 10 PCBDDCBenignMatMult_ctx ctx; 11 PetscErrorCode ierr; 12 PetscBool apply_right,apply_left,reset_x; 13 14 PetscFunctionBegin; 15 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 16 if (transpose) { 17 apply_right = ctx->apply_left; 18 apply_left = ctx->apply_right; 19 } else { 20 apply_right = ctx->apply_right; 21 apply_left = ctx->apply_left; 22 } 23 reset_x = PETSC_FALSE; 24 if (apply_right) { 25 const PetscScalar *ax; 26 PetscInt nl,i; 27 28 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 29 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 30 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 31 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 32 for (i=0;i<ctx->benign_n;i++) { 33 PetscScalar sum,val; 34 const PetscInt *idxs; 35 PetscInt nz,j; 36 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 37 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 38 sum = 0.; 39 if (ctx->apply_p0) { 40 val = ctx->work[idxs[nz-1]]; 41 for (j=0;j<nz-1;j++) { 42 sum += ctx->work[idxs[j]]; 43 ctx->work[idxs[j]] += val; 44 } 45 } else { 46 for (j=0;j<nz-1;j++) { 47 sum += ctx->work[idxs[j]]; 48 } 49 } 50 ctx->work[idxs[nz-1]] -= sum; 51 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 54 reset_x = PETSC_TRUE; 55 } 56 if (transpose) { 57 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 58 } else { 59 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 60 } 61 if (reset_x) { 62 ierr = VecResetArray(x);CHKERRQ(ierr); 63 } 64 if (apply_left) { 65 PetscScalar *ay; 66 PetscInt i; 67 68 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 69 for (i=0;i<ctx->benign_n;i++) { 70 PetscScalar sum,val; 71 const PetscInt *idxs; 72 PetscInt nz,j; 73 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 74 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 75 val = -ay[idxs[nz-1]]; 76 if (ctx->apply_p0) { 77 sum = 0.; 78 for (j=0;j<nz-1;j++) { 79 sum += ay[idxs[j]]; 80 ay[idxs[j]] += val; 81 } 82 ay[idxs[nz-1]] += sum; 83 } else { 84 for (j=0;j<nz-1;j++) { 85 ay[idxs[j]] += val; 86 } 87 ay[idxs[nz-1]] = 0.; 88 } 89 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 90 } 91 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 98 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 99 { 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 109 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 110 { 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCBenignShellMat" 120 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 121 { 122 PC_IS *pcis = (PC_IS*)pc->data; 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PCBDDCBenignMatMult_ctx ctx; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!restore) { 129 Mat A_IB,A_BI; 130 PetscScalar *work; 131 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 132 133 if (pcbddc->benign_original_mat) { 134 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 135 } 136 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 137 PetscFunctionReturn(0); 138 } 139 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 140 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 141 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 142 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 143 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 145 ierr = PetscNew(&ctx);CHKERRQ(ierr); 146 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 147 ctx->apply_left = PETSC_TRUE; 148 ctx->apply_right = PETSC_FALSE; 149 ctx->apply_p0 = PETSC_FALSE; 150 ctx->benign_n = pcbddc->benign_n; 151 if (reuse) { 152 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 153 ctx->free = PETSC_FALSE; 154 } else { /* TODO: could be optimized for successive solves */ 155 ISLocalToGlobalMapping N_to_D; 156 PetscInt i; 157 158 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 159 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) { 161 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 162 } 163 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 164 ctx->free = PETSC_TRUE; 165 } 166 ctx->A = pcis->A_IB; 167 ctx->work = work; 168 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 169 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 pcis->A_IB = A_IB; 172 173 /* A_BI as A_IB^T */ 174 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 175 pcbddc->benign_original_mat = pcis->A_BI; 176 pcis->A_BI = A_BI; 177 } else { 178 if (!pcbddc->benign_original_mat) { 179 PetscFunctionReturn(0); 180 } 181 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 182 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 183 pcis->A_IB = ctx->A; 184 ctx->A = NULL; 185 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 186 pcis->A_BI = pcbddc->benign_original_mat; 187 pcbddc->benign_original_mat = NULL; 188 if (ctx->free) { 189 PetscInt i; 190 for (i=0;i<ctx->benign_n;i++) { 191 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 192 } 193 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 194 } 195 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 196 ierr = PetscFree(ctx);CHKERRQ(ierr); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 /* used just in bddc debug mode */ 202 #undef __FUNCT__ 203 #define __FUNCT__ "PCBDDCBenignProject" 204 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 205 { 206 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 207 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 208 Mat An; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 213 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 214 if (is1) { 215 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 216 ierr = MatDestroy(&An);CHKERRQ(ierr); 217 } else { 218 *B = An; 219 } 220 PetscFunctionReturn(0); 221 } 222 223 /* TODO: add reuse flag */ 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatSeqAIJCompress" 226 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 227 { 228 Mat Bt; 229 PetscScalar *a,*bdata; 230 const PetscInt *ii,*ij; 231 PetscInt m,n,i,nnz,*bii,*bij; 232 PetscBool flg_row; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 237 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 238 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 239 nnz = n; 240 for (i=0;i<ii[n];i++) { 241 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 242 } 243 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 244 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 246 nnz = 0; 247 bii[0] = 0; 248 for (i=0;i<n;i++) { 249 PetscInt j; 250 for (j=ii[i];j<ii[i+1];j++) { 251 PetscScalar entry = a[j]; 252 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 253 bij[nnz] = ij[j]; 254 bdata[nnz] = entry; 255 nnz++; 256 } 257 } 258 bii[i+1] = nnz; 259 } 260 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 261 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 262 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 263 { 264 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 265 b->free_a = PETSC_TRUE; 266 b->free_ij = PETSC_TRUE; 267 } 268 *B = Bt; 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDetectDisconnectedComponents" 274 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 275 { 276 Mat B; 277 IS is_dummy,*cc_n; 278 ISLocalToGlobalMapping l2gmap_dummy; 279 PCBDDCGraph graph; 280 PetscInt i,n; 281 PetscInt *xadj,*adjncy; 282 PetscInt *xadj_filtered,*adjncy_filtered; 283 PetscBool flg_row,isseqaij; 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 if (!A->rmap->N || !A->cmap->N) { 288 *ncc = 0; 289 *cc = NULL; 290 PetscFunctionReturn(0); 291 } 292 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 293 if (!isseqaij && filter) { 294 PetscBool isseqdense; 295 296 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 297 if (!isseqdense) { 298 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 299 } else { /* TODO: rectangular case and LDA */ 300 PetscScalar *array; 301 PetscReal chop=1.e-6; 302 303 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 304 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 305 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 306 for (i=0;i<n;i++) { 307 PetscInt j; 308 for (j=i+1;j<n;j++) { 309 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 310 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 311 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 312 } 313 } 314 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 315 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 316 } 317 } else { 318 B = A; 319 } 320 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 321 322 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 323 if (filter) { 324 PetscScalar *data; 325 PetscInt j,cum; 326 327 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 328 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 329 cum = 0; 330 for (i=0;i<n;i++) { 331 PetscInt t; 332 333 for (j=xadj[i];j<xadj[i+1];j++) { 334 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 335 continue; 336 } 337 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 338 } 339 t = xadj_filtered[i]; 340 xadj_filtered[i] = cum; 341 cum += t; 342 } 343 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 344 } else { 345 xadj_filtered = NULL; 346 adjncy_filtered = NULL; 347 } 348 349 /* compute local connected components using PCBDDCGraph */ 350 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 351 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 352 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 353 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 354 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 355 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 356 if (xadj_filtered) { 357 graph->xadj = xadj_filtered; 358 graph->adjncy = adjncy_filtered; 359 } else { 360 graph->xadj = xadj; 361 graph->adjncy = adjncy; 362 } 363 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 365 /* partial clean up */ 366 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 367 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 368 if (A != B) { 369 ierr = MatDestroy(&B);CHKERRQ(ierr); 370 } 371 372 /* get back data */ 373 if (ncc) *ncc = graph->ncc; 374 if (cc) { 375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 376 for (i=0;i<graph->ncc;i++) { 377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 378 } 379 *cc = cc_n; 380 } 381 /* clean up graph */ 382 graph->xadj = 0; 383 graph->adjncy = 0; 384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCBenignCheck" 390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 391 { 392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 393 PC_IS* pcis = (PC_IS*)(pc->data); 394 IS dirIS = NULL; 395 PetscInt i; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 400 if (zerodiag) { 401 Mat A; 402 Vec vec3_N; 403 PetscScalar *vals; 404 const PetscInt *idxs; 405 PetscInt nz,*count; 406 407 /* p0 */ 408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 412 for (i=0;i<nz;i++) vals[i] = 1.; 413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 416 /* v_I */ 417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 418 for (i=0;i<nz;i++) vals[i] = 0.; 419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 425 if (dirIS) { 426 PetscInt n; 427 428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 430 for (i=0;i<n;i++) vals[i] = 0.; 431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 433 } 434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 438 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 441 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 442 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 443 } 444 ierr = PetscFree(vals);CHKERRQ(ierr); 445 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 446 447 /* there should not be any pressure dofs lying on the interface */ 448 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 449 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 450 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 451 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 453 for (i=0;i<nz;i++) { 454 if (count[idxs[i]]) { 455 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 456 } 457 } 458 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 459 ierr = PetscFree(count);CHKERRQ(ierr); 460 } 461 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 462 463 /* check PCBDDCBenignGetOrSetP0 */ 464 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 465 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 466 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) { 470 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 471 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 472 } 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 479 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 480 { 481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 482 IS pressures,zerodiag,*zerodiag_subs; 483 PetscInt nz,n; 484 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 489 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 490 for (n=0;n<pcbddc->benign_n;n++) { 491 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 492 } 493 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 494 pcbddc->benign_n = 0; 495 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 496 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 497 Checks if all the pressure dofs in each subdomain have a zero diagonal 498 If not, a change of basis on pressures is not needed 499 since the local Schur complements are SPD 500 */ 501 has_null_pressures = PETSC_TRUE; 502 have_null = PETSC_TRUE; 503 if (pcbddc->n_ISForDofsLocal) { 504 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 505 506 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 507 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 508 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 509 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 510 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 511 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 512 if (!sorted) { 513 ierr = ISSort(pressures);CHKERRQ(ierr); 514 } 515 } else { 516 pressures = NULL; 517 } 518 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 519 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 520 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 521 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 522 if (!sorted) { 523 ierr = ISSort(zerodiag);CHKERRQ(ierr); 524 } 525 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 526 if (!nz) { 527 if (n) have_null = PETSC_FALSE; 528 has_null_pressures = PETSC_FALSE; 529 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 530 } 531 recompute_zerodiag = PETSC_FALSE; 532 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 533 zerodiag_subs = NULL; 534 pcbddc->benign_n = 0; 535 if (has_null_pressures) { 536 IS *subs; 537 PetscInt nsubs,i; 538 539 subs = pcbddc->local_subs; 540 nsubs = pcbddc->n_local_subs; 541 if (nsubs > 1) { 542 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 543 for (i=0;i<nsubs;i++) { 544 ISLocalToGlobalMapping l2g; 545 IS t_zerodiag_subs; 546 PetscInt nl; 547 548 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 549 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 550 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 551 if (nl) { 552 PetscBool valid = PETSC_TRUE; 553 554 if (pressures) { 555 IS t_pressure_subs; 556 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 557 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 558 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 559 } 560 if (valid) { 561 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 562 pcbddc->benign_n++; 563 } else { 564 recompute_zerodiag = PETSC_TRUE; 565 } 566 } 567 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 568 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 569 } 570 } else { /* there's just one subdomain (or zero if they have not been detected */ 571 PetscBool valid = PETSC_TRUE; 572 if (pressures) { 573 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 574 } 575 if (valid) { 576 pcbddc->benign_n = 1; 577 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 578 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 579 zerodiag_subs[0] = zerodiag; 580 } 581 } 582 } 583 584 if (!pcbddc->benign_n) { 585 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 586 recompute_zerodiag = PETSC_FALSE; 587 has_null_pressures = PETSC_FALSE; 588 have_null = PETSC_FALSE; 589 } 590 591 /* final check for null pressures */ 592 if (zerodiag && pressures) { 593 PetscInt nz,np; 594 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 595 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 596 if (nz != np) have_null = PETSC_FALSE; 597 } 598 599 if (recompute_zerodiag) { 600 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 601 if (pcbddc->benign_n == 1) { 602 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 603 zerodiag = zerodiag_subs[0]; 604 } else { 605 PetscInt i,nzn,*new_idxs; 606 607 nzn = 0; 608 for (i=0;i<pcbddc->benign_n;i++) { 609 PetscInt ns; 610 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 611 nzn += ns; 612 } 613 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 614 nzn = 0; 615 for (i=0;i<pcbddc->benign_n;i++) { 616 PetscInt ns,*idxs; 617 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 618 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 619 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 620 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 621 nzn += ns; 622 } 623 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 624 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 625 } 626 have_null = PETSC_FALSE; 627 } 628 629 /* no-net-flux */ 630 if (pcbddc->benign_compute_nonetflux) { 631 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 632 MatNullSpace near_null_space; 633 ISLocalToGlobalMapping rmap; 634 Vec quad_vec; 635 PetscScalar *pvals; 636 PetscInt i,np,*dummyins; 637 638 /* create vector to hold quadrature weights */ 639 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 640 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 641 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 642 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 643 644 /* compute B^{(i)T} * 1_p */ 645 np = 0; 646 if (zerodiag) { 647 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 648 } else if (pressures) { 649 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 650 } 651 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 652 for (i=0;i<np;i++) pvals[i] = 1.; 653 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 654 if (np) { 655 const PetscInt *pidxs; 656 657 if (zerodiag) { 658 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 659 } else if (pressures) { 660 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 661 } 662 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 663 if (zerodiag) { 664 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 665 } else if (pressures) { 666 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 667 } 668 } 669 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 670 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 671 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 672 ierr = PetscFree(pvals);CHKERRQ(ierr); 673 674 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 675 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 676 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 677 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 678 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 679 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 680 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 681 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 682 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 683 for (i=0;i<matis->sf_nleaves;i++) { 684 if (PetscGlobalRank != matis->sf_leafdata[i]) { 685 pvals[i] = 0.; 686 } 687 } 688 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 689 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 690 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 691 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 692 ierr = PetscFree(dummyins);CHKERRQ(ierr); 693 694 /* assembly quadrature vec and attach near null space to pmat */ 695 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 696 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 697 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 698 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 699 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 700 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 701 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 702 } 703 704 /* change of basis and p0 dofs */ 705 if (has_null_pressures) { 706 IS zerodiagc; 707 const PetscInt *idxs,*idxsc; 708 PetscInt i,s,*nnz; 709 710 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 711 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 712 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 713 /* local change of basis for pressures */ 714 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 715 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 716 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 717 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 718 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 719 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 720 for (i=0;i<pcbddc->benign_n;i++) { 721 PetscInt nzs,j; 722 723 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 724 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 725 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 726 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 727 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 728 } 729 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 730 ierr = PetscFree(nnz);CHKERRQ(ierr); 731 /* set identity on velocities */ 732 for (i=0;i<n-nz;i++) { 733 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 734 } 735 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 736 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 738 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 739 /* set change on pressures */ 740 for (s=0;s<pcbddc->benign_n;s++) { 741 PetscScalar *array; 742 PetscInt nzs; 743 744 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 745 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 746 for (i=0;i<nzs-1;i++) { 747 PetscScalar vals[2]; 748 PetscInt cols[2]; 749 750 cols[0] = idxs[i]; 751 cols[1] = idxs[nzs-1]; 752 vals[0] = 1.; 753 vals[1] = 1.; 754 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 755 } 756 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 757 for (i=0;i<nzs-1;i++) array[i] = -1.; 758 array[nzs-1] = 1.; 759 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 760 /* store local idxs for p0 */ 761 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 762 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 763 ierr = PetscFree(array);CHKERRQ(ierr); 764 } 765 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* project if needed */ 768 if (pcbddc->benign_change_explicit) { 769 Mat M; 770 771 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 773 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 774 ierr = MatDestroy(&M);CHKERRQ(ierr); 775 } 776 /* store global idxs for p0 */ 777 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 778 } 779 pcbddc->benign_zerodiag_subs = zerodiag_subs; 780 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 781 782 /* determines if the coarse solver will be singular or not */ 783 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 784 *zerodiaglocal = zerodiag; 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 790 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 791 { 792 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 if (!pcbddc->benign_sf) { 797 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 798 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 799 } 800 if (get) { /* use SF to get values */ 801 PetscScalar *array; 802 803 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 804 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 805 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 806 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 807 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 808 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 809 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 810 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 817 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 818 { 819 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 820 PetscErrorCode ierr; 821 822 PetscFunctionBegin; 823 /* TODO: add error checking 824 - avoid nested pop (or push) calls. 825 - cannot push before pop. 826 - cannot call this if pcbddc->local_mat is NULL 827 */ 828 if (!pcbddc->benign_n) { 829 PetscFunctionReturn(0); 830 } 831 if (pop) { 832 if (pcbddc->benign_change_explicit) { 833 IS is_p0; 834 MatReuse reuse; 835 836 /* extract B_0 */ 837 reuse = MAT_INITIAL_MATRIX; 838 if (pcbddc->benign_B0) { 839 reuse = MAT_REUSE_MATRIX; 840 } 841 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 842 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 843 /* remove rows and cols from local problem */ 844 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 845 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 846 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 848 } else { 849 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 850 PetscScalar *vals; 851 PetscInt i,n,*idxs_ins; 852 853 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 854 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 855 if (!pcbddc->benign_B0) { 856 PetscInt *nnz; 857 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 858 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 859 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 860 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 861 for (i=0;i<pcbddc->benign_n;i++) { 862 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 863 nnz[i] = n - nnz[i]; 864 } 865 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 866 ierr = PetscFree(nnz);CHKERRQ(ierr); 867 } 868 869 for (i=0;i<pcbddc->benign_n;i++) { 870 PetscScalar *array; 871 PetscInt *idxs,j,nz,cum; 872 873 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 874 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 875 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 876 for (j=0;j<nz;j++) vals[j] = 1.; 877 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 878 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 879 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 880 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 881 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 882 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 883 cum = 0; 884 for (j=0;j<n;j++) { 885 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 886 vals[cum] = array[j]; 887 idxs_ins[cum] = j; 888 cum++; 889 } 890 } 891 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 892 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 } 895 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 898 } 899 } else { /* push */ 900 if (pcbddc->benign_change_explicit) { 901 PetscInt i; 902 903 for (i=0;i<pcbddc->benign_n;i++) { 904 PetscScalar *B0_vals; 905 PetscInt *B0_cols,B0_ncol; 906 907 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 908 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 909 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 911 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 } else { 916 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 917 } 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "PCBDDCAdaptiveSelection" 924 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 925 { 926 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 927 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 928 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 929 PetscBLASInt *B_iwork,*B_ifail; 930 PetscScalar *work,lwork; 931 PetscScalar *St,*S,*eigv; 932 PetscScalar *Sarray,*Starray; 933 PetscReal *eigs,thresh; 934 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 935 PetscBool allocated_S_St; 936 #if defined(PETSC_USE_COMPLEX) 937 PetscReal *rwork; 938 #endif 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 if (!sub_schurs->schur_explicit) { 943 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 944 } 945 946 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 947 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 948 } 949 950 if (pcbddc->dbg_flag) { 951 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 954 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 955 } 956 957 if (pcbddc->dbg_flag) { 958 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 959 } 960 961 /* max size of subsets */ 962 mss = 0; 963 for (i=0;i<sub_schurs->n_subs;i++) { 964 PetscInt subset_size; 965 966 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 967 mss = PetscMax(mss,subset_size); 968 } 969 970 /* min/max and threshold */ 971 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 972 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 973 nmax = PetscMax(nmin,nmax); 974 allocated_S_St = PETSC_FALSE; 975 if (nmin) { 976 allocated_S_St = PETSC_TRUE; 977 } 978 979 /* allocate lapack workspace */ 980 cum = cum2 = 0; 981 maxneigs = 0; 982 for (i=0;i<sub_schurs->n_subs;i++) { 983 PetscInt n,subset_size; 984 985 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 986 n = PetscMin(subset_size,nmax); 987 cum += subset_size; 988 cum2 += subset_size*n; 989 maxneigs = PetscMax(maxneigs,n); 990 } 991 if (mss) { 992 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 993 PetscBLASInt B_itype = 1; 994 PetscBLASInt B_N = mss; 995 PetscReal zero = 0.0; 996 PetscReal eps = 0.0; /* dlamch? */ 997 998 B_lwork = -1; 999 S = NULL; 1000 St = NULL; 1001 eigs = NULL; 1002 eigv = NULL; 1003 B_iwork = NULL; 1004 B_ifail = NULL; 1005 #if defined(PETSC_USE_COMPLEX) 1006 rwork = NULL; 1007 #endif 1008 thresh = 1.0; 1009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1010 #if defined(PETSC_USE_COMPLEX) 1011 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1012 #else 1013 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1014 #endif 1015 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1016 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1017 } else { 1018 /* TODO */ 1019 } 1020 } else { 1021 lwork = 0; 1022 } 1023 1024 nv = 0; 1025 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1026 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1027 } 1028 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1029 if (allocated_S_St) { 1030 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1031 } 1032 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1033 #if defined(PETSC_USE_COMPLEX) 1034 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1035 #endif 1036 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1037 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1038 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1039 nv+cum,&pcbddc->adaptive_constraints_idxs, 1040 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1041 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1042 1043 maxneigs = 0; 1044 cum = cumarray = 0; 1045 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1046 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1047 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1048 const PetscInt *idxs; 1049 1050 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1051 for (cum=0;cum<nv;cum++) { 1052 pcbddc->adaptive_constraints_n[cum] = 1; 1053 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1054 pcbddc->adaptive_constraints_data[cum] = 1.0; 1055 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1056 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1057 } 1058 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1059 } 1060 1061 if (mss) { /* multilevel */ 1062 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1063 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1064 } 1065 1066 thresh = pcbddc->adaptive_threshold; 1067 for (i=0;i<sub_schurs->n_subs;i++) { 1068 const PetscInt *idxs; 1069 PetscReal infty = PETSC_MAX_REAL; 1070 PetscInt j,subset_size,eigs_start = 0; 1071 PetscBLASInt B_N; 1072 PetscBool same_data = PETSC_FALSE; 1073 1074 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1075 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1076 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1077 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1078 if (sub_schurs->is_hermitian) { 1079 PetscInt j,k; 1080 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1081 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1082 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1083 } 1084 for (j=0;j<subset_size;j++) { 1085 for (k=j;k<subset_size;k++) { 1086 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1087 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1088 } 1089 } 1090 } else { 1091 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1092 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1093 } 1094 } else { 1095 S = Sarray + cumarray; 1096 St = Starray + cumarray; 1097 } 1098 /* see if we can save some work */ 1099 if (sub_schurs->n_subs == 1) { 1100 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1101 } 1102 1103 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1104 B_neigs = 0; 1105 } else { 1106 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1107 PetscBLASInt B_itype = 1; 1108 PetscBLASInt B_IL, B_IU; 1109 PetscReal eps = -1.0; /* dlamch? */ 1110 PetscInt nmin_s; 1111 1112 if (pcbddc->dbg_flag) { 1113 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1114 } 1115 1116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1117 if (thresh > 1.+PETSC_SMALL) { 1118 1119 /* ask for eigenvalues larger than thresh */ 1120 #if defined(PETSC_USE_COMPLEX) 1121 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1122 #else 1123 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1124 #endif 1125 } else { 1126 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1127 B_IL = 1; 1128 #if defined(PETSC_USE_COMPLEX) 1129 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1130 #else 1131 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1132 #endif 1133 } 1134 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1135 if (B_ierr) { 1136 if (B_ierr < 0 ) { 1137 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1138 } else if (B_ierr <= B_N) { 1139 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1140 } else { 1141 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1142 } 1143 } 1144 1145 if (B_neigs > nmax) { 1146 if (pcbddc->dbg_flag) { 1147 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1148 } 1149 eigs_start = B_neigs -nmax; 1150 B_neigs = nmax; 1151 } 1152 1153 nmin_s = PetscMin(nmin,B_N); 1154 if (B_neigs < nmin_s) { 1155 PetscBLASInt B_neigs2; 1156 1157 B_IU = B_N - B_neigs; 1158 B_IL = B_N - nmin_s + 1; 1159 if (pcbddc->dbg_flag) { 1160 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1161 } 1162 if (sub_schurs->is_hermitian) { 1163 PetscInt j; 1164 for (j=0;j<subset_size;j++) { 1165 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1166 } 1167 for (j=0;j<subset_size;j++) { 1168 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1169 } 1170 } else { 1171 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1172 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1173 } 1174 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1175 #if defined(PETSC_USE_COMPLEX) 1176 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1177 #else 1178 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1179 #endif 1180 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1181 B_neigs += B_neigs2; 1182 } 1183 if (B_ierr) { 1184 if (B_ierr < 0 ) { 1185 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1186 } else if (B_ierr <= B_N) { 1187 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1188 } else { 1189 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1190 } 1191 } 1192 if (pcbddc->dbg_flag) { 1193 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1194 for (j=0;j<B_neigs;j++) { 1195 if (eigs[j] == 0.0) { 1196 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1197 } else { 1198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1199 } 1200 } 1201 } 1202 } else { 1203 /* TODO */ 1204 } 1205 } 1206 /* change the basis back to the original one */ 1207 if (sub_schurs->change) { 1208 Mat change,phi,phit; 1209 1210 if (pcbddc->dbg_flag > 1) { 1211 PetscInt ii; 1212 for (ii=0;ii<B_neigs;ii++) { 1213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1214 for (j=0;j<B_N;j++) { 1215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1216 } 1217 } 1218 } 1219 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1220 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1221 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1222 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1223 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1224 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1225 } 1226 maxneigs = PetscMax(B_neigs,maxneigs); 1227 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1228 if (B_neigs) { 1229 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1230 1231 if (pcbddc->dbg_flag > 1) { 1232 PetscInt ii; 1233 for (ii=0;ii<B_neigs;ii++) { 1234 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1235 for (j=0;j<B_N;j++) { 1236 #if defined(PETSC_USE_COMPLEX) 1237 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1238 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1240 #else 1241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1242 #endif 1243 } 1244 } 1245 } 1246 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1247 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1248 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1249 cum++; 1250 } 1251 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1252 /* shift for next computation */ 1253 cumarray += subset_size*subset_size; 1254 } 1255 if (pcbddc->dbg_flag) { 1256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1257 } 1258 1259 if (mss) { 1260 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1261 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1262 /* destroy matrices (junk) */ 1263 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1264 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1265 } 1266 if (allocated_S_St) { 1267 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1268 } 1269 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1270 #if defined(PETSC_USE_COMPLEX) 1271 ierr = PetscFree(rwork);CHKERRQ(ierr); 1272 #endif 1273 if (pcbddc->dbg_flag) { 1274 PetscInt maxneigs_r; 1275 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "PCBDDCSetUpSolvers" 1283 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1284 { 1285 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1286 PetscScalar *coarse_submat_vals; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1291 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1292 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1293 1294 /* Setup local neumann solver ksp_R */ 1295 /* PCBDDCSetUpLocalScatters should be called first! */ 1296 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1297 1298 /* Change global null space passed in by the user if change of basis has been requested */ 1299 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1300 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1301 } 1302 1303 /* 1304 Setup local correction and local part of coarse basis. 1305 Gives back the dense local part of the coarse matrix in column major ordering 1306 */ 1307 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1308 1309 /* Compute total number of coarse nodes and setup coarse solver */ 1310 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1311 1312 /* free */ 1313 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "PCBDDCResetCustomization" 1319 PetscErrorCode PCBDDCResetCustomization(PC pc) 1320 { 1321 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1322 PetscErrorCode ierr; 1323 1324 PetscFunctionBegin; 1325 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1326 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1327 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1328 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1329 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1330 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1331 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1332 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1333 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1334 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1335 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1336 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "PCBDDCResetTopography" 1342 PetscErrorCode PCBDDCResetTopography(PC pc) 1343 { 1344 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1345 PetscInt i; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1350 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1351 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1352 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1353 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1354 for (i=0;i<pcbddc->n_local_subs;i++) { 1355 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1356 } 1357 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1358 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "PCBDDCResetSolvers" 1364 PetscErrorCode PCBDDCResetSolvers(PC pc) 1365 { 1366 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1371 if (pcbddc->coarse_phi_B) { 1372 PetscScalar *array; 1373 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1374 ierr = PetscFree(array);CHKERRQ(ierr); 1375 } 1376 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1377 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1378 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1379 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1380 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1381 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1382 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1383 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1384 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1385 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1386 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1387 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1388 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1389 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1390 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1391 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1392 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1393 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1394 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1395 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1396 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1397 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1398 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1399 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1400 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1401 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1402 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1403 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1404 if (pcbddc->benign_zerodiag_subs) { 1405 PetscInt i; 1406 for (i=0;i<pcbddc->benign_n;i++) { 1407 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1408 } 1409 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1410 } 1411 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1417 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1418 { 1419 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1420 PC_IS *pcis = (PC_IS*)pc->data; 1421 VecType impVecType; 1422 PetscInt n_constraints,n_R,old_size; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 if (!pcbddc->ConstraintMatrix) { 1427 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1428 } 1429 /* get sizes */ 1430 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1431 n_R = pcis->n - pcbddc->n_vertices; 1432 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1433 /* local work vectors (try to avoid unneeded work)*/ 1434 /* R nodes */ 1435 old_size = -1; 1436 if (pcbddc->vec1_R) { 1437 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1438 } 1439 if (n_R != old_size) { 1440 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1441 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1442 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1443 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1444 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1445 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1446 } 1447 /* local primal dofs */ 1448 old_size = -1; 1449 if (pcbddc->vec1_P) { 1450 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1451 } 1452 if (pcbddc->local_primal_size != old_size) { 1453 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1454 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1455 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1456 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1457 } 1458 /* local explicit constraints */ 1459 old_size = -1; 1460 if (pcbddc->vec1_C) { 1461 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1462 } 1463 if (n_constraints && n_constraints != old_size) { 1464 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1465 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1466 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1467 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "PCBDDCSetUpCorrection" 1474 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1475 { 1476 PetscErrorCode ierr; 1477 /* pointers to pcis and pcbddc */ 1478 PC_IS* pcis = (PC_IS*)pc->data; 1479 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1480 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1481 /* submatrices of local problem */ 1482 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1483 /* submatrices of local coarse problem */ 1484 Mat S_VV,S_CV,S_VC,S_CC; 1485 /* working matrices */ 1486 Mat C_CR; 1487 /* additional working stuff */ 1488 PC pc_R; 1489 Mat F; 1490 Vec dummy_vec; 1491 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1492 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1493 PetscScalar *work; 1494 PetscInt *idx_V_B; 1495 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1496 PetscInt i,n_R,n_D,n_B; 1497 1498 /* some shortcuts to scalars */ 1499 PetscScalar one=1.0,m_one=-1.0; 1500 1501 PetscFunctionBegin; 1502 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1503 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1504 } 1505 1506 /* Set Non-overlapping dimensions */ 1507 n_vertices = pcbddc->n_vertices; 1508 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1509 n_B = pcis->n_B; 1510 n_D = pcis->n - n_B; 1511 n_R = pcis->n - n_vertices; 1512 1513 /* vertices in boundary numbering */ 1514 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1515 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1516 if (i != n_vertices) { 1517 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1518 } 1519 1520 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1521 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1522 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1523 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1524 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1525 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1526 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1527 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1528 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1529 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1530 1531 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1532 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1533 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1534 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1535 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1536 lda_rhs = n_R; 1537 need_benign_correction = PETSC_FALSE; 1538 if (isLU || isILU || isCHOL) { 1539 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1540 } else if (sub_schurs->reuse_solver) { 1541 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1542 MatFactorType type; 1543 1544 F = reuse_solver->F; 1545 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1546 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1547 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1548 need_benign_correction = !!reuse_solver->benign_n; 1549 } else { 1550 F = NULL; 1551 } 1552 1553 /* allocate workspace */ 1554 n = 0; 1555 if (n_constraints) { 1556 n += lda_rhs*n_constraints; 1557 } 1558 if (n_vertices) { 1559 n = PetscMax(2*lda_rhs*n_vertices,n); 1560 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1561 } 1562 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1563 1564 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1565 dummy_vec = NULL; 1566 if (need_benign_correction && lda_rhs != n_R && F) { 1567 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1568 } 1569 1570 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1571 if (n_constraints) { 1572 Mat M1,M2,M3,C_B; 1573 IS is_aux; 1574 PetscScalar *array,*array2; 1575 1576 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1577 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1578 1579 /* Extract constraints on R nodes: C_{CR} */ 1580 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1581 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1582 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1583 1584 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1585 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1586 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1587 for (i=0;i<n_constraints;i++) { 1588 const PetscScalar *row_cmat_values; 1589 const PetscInt *row_cmat_indices; 1590 PetscInt size_of_constraint,j; 1591 1592 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1593 for (j=0;j<size_of_constraint;j++) { 1594 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1595 } 1596 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1597 } 1598 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1599 if (F) { 1600 Mat B; 1601 1602 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1603 if (need_benign_correction) { 1604 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1605 1606 /* rhs is already zero on interior dofs, no need to change the rhs */ 1607 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1608 } 1609 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1610 if (need_benign_correction) { 1611 PetscScalar *marr; 1612 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1613 1614 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1615 if (lda_rhs != n_R) { 1616 for (i=0;i<n_constraints;i++) { 1617 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1618 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1619 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1620 } 1621 } else { 1622 for (i=0;i<n_constraints;i++) { 1623 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1624 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1625 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1626 } 1627 } 1628 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1629 } 1630 ierr = MatDestroy(&B);CHKERRQ(ierr); 1631 } else { 1632 PetscScalar *marr; 1633 1634 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1635 for (i=0;i<n_constraints;i++) { 1636 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1637 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1638 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1639 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1640 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1641 } 1642 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1643 } 1644 if (!pcbddc->switch_static) { 1645 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1646 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1647 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1648 for (i=0;i<n_constraints;i++) { 1649 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1650 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1651 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1654 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1655 } 1656 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1657 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1658 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1659 } else { 1660 if (lda_rhs != n_R) { 1661 IS dummy; 1662 1663 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1664 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1665 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1666 } else { 1667 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1668 pcbddc->local_auxmat2 = local_auxmat2_R; 1669 } 1670 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1671 } 1672 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1673 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1674 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1675 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1677 if (isCHOL) { 1678 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1679 } else { 1680 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1681 } 1682 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1683 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1684 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1685 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1686 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1687 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1688 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1689 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1690 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1691 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1692 } 1693 1694 /* Get submatrices from subdomain matrix */ 1695 if (n_vertices) { 1696 IS is_aux; 1697 1698 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1699 IS tis; 1700 1701 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1702 ierr = ISSort(tis);CHKERRQ(ierr); 1703 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1705 } else { 1706 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1707 } 1708 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1709 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1710 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1711 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1712 } 1713 1714 /* Matrix of coarse basis functions (local) */ 1715 if (pcbddc->coarse_phi_B) { 1716 PetscInt on_B,on_primal,on_D=n_D; 1717 if (pcbddc->coarse_phi_D) { 1718 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1719 } 1720 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1721 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1722 PetscScalar *marray; 1723 1724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1725 ierr = PetscFree(marray);CHKERRQ(ierr); 1726 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1727 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1729 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1730 } 1731 } 1732 1733 if (!pcbddc->coarse_phi_B) { 1734 PetscScalar *marray; 1735 1736 n = n_B*pcbddc->local_primal_size; 1737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1738 n += n_D*pcbddc->local_primal_size; 1739 } 1740 if (!pcbddc->symmetric_primal) { 1741 n *= 2; 1742 } 1743 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1745 n = n_B*pcbddc->local_primal_size; 1746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1748 n += n_D*pcbddc->local_primal_size; 1749 } 1750 if (!pcbddc->symmetric_primal) { 1751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1752 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1753 n = n_B*pcbddc->local_primal_size; 1754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1755 } 1756 } else { 1757 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1758 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1761 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1762 } 1763 } 1764 } 1765 1766 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1767 p0_lidx_I = NULL; 1768 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1769 const PetscInt *idxs; 1770 1771 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1772 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1773 for (i=0;i<pcbddc->benign_n;i++) { 1774 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1775 } 1776 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1777 } 1778 1779 /* vertices */ 1780 if (n_vertices) { 1781 1782 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1783 1784 if (n_R) { 1785 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1786 PetscBLASInt B_N,B_one = 1; 1787 PetscScalar *x,*y; 1788 PetscBool isseqaij; 1789 1790 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1791 if (need_benign_correction) { 1792 ISLocalToGlobalMapping RtoN; 1793 IS is_p0; 1794 PetscInt *idxs_p0,n; 1795 1796 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1797 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1798 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1799 if (n != pcbddc->benign_n) { 1800 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1801 } 1802 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1803 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1804 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1805 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1806 } 1807 1808 if (lda_rhs == n_R) { 1809 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1810 } else { 1811 PetscScalar *av,*array; 1812 const PetscInt *xadj,*adjncy; 1813 PetscInt n; 1814 PetscBool flg_row; 1815 1816 array = work+lda_rhs*n_vertices; 1817 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1818 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1819 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1820 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1821 for (i=0;i<n;i++) { 1822 PetscInt j; 1823 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1824 } 1825 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1826 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1827 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1828 } 1829 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1830 if (need_benign_correction) { 1831 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1832 PetscScalar *marr; 1833 1834 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1835 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1836 1837 | 0 0 0 | (V) 1838 L = | 0 0 -1 | (P-p0) 1839 | 0 0 -1 | (p0) 1840 1841 */ 1842 for (i=0;i<reuse_solver->benign_n;i++) { 1843 const PetscScalar *vals; 1844 const PetscInt *idxs,*idxs_zero; 1845 PetscInt n,j,nz; 1846 1847 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1848 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1849 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1850 for (j=0;j<n;j++) { 1851 PetscScalar val = vals[j]; 1852 PetscInt k,col = idxs[j]; 1853 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1854 } 1855 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1856 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1857 } 1858 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1859 } 1860 if (F) { 1861 /* need to correct the rhs */ 1862 if (need_benign_correction) { 1863 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1864 PetscScalar *marr; 1865 1866 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1867 if (lda_rhs != n_R) { 1868 for (i=0;i<n_vertices;i++) { 1869 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1870 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1871 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1872 } 1873 } else { 1874 for (i=0;i<n_vertices;i++) { 1875 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1876 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1877 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1878 } 1879 } 1880 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1881 } 1882 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1883 /* need to correct the solution */ 1884 if (need_benign_correction) { 1885 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1886 PetscScalar *marr; 1887 1888 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1889 if (lda_rhs != n_R) { 1890 for (i=0;i<n_vertices;i++) { 1891 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1892 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1893 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1894 } 1895 } else { 1896 for (i=0;i<n_vertices;i++) { 1897 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1898 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1899 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1900 } 1901 } 1902 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1903 } 1904 } else { 1905 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1906 for (i=0;i<n_vertices;i++) { 1907 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1908 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1909 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1910 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1911 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1912 } 1913 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1914 } 1915 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1916 /* S_VV and S_CV */ 1917 if (n_constraints) { 1918 Mat B; 1919 1920 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1921 for (i=0;i<n_vertices;i++) { 1922 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1923 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1924 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1925 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1926 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1927 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1928 } 1929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1930 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1931 ierr = MatDestroy(&B);CHKERRQ(ierr); 1932 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1933 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1934 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1935 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1936 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1937 ierr = MatDestroy(&B);CHKERRQ(ierr); 1938 } 1939 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1940 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1941 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1942 } 1943 if (lda_rhs != n_R) { 1944 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1946 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1947 } 1948 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1949 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1950 if (need_benign_correction) { 1951 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1952 PetscScalar *marr,*sums; 1953 1954 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1955 ierr = MatDenseGetArray(S_VVt,&marr); 1956 for (i=0;i<reuse_solver->benign_n;i++) { 1957 const PetscScalar *vals; 1958 const PetscInt *idxs,*idxs_zero; 1959 PetscInt n,j,nz; 1960 1961 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1962 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1963 for (j=0;j<n_vertices;j++) { 1964 PetscInt k; 1965 sums[j] = 0.; 1966 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1967 } 1968 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1969 for (j=0;j<n;j++) { 1970 PetscScalar val = vals[j]; 1971 PetscInt k; 1972 for (k=0;k<n_vertices;k++) { 1973 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1974 } 1975 } 1976 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1977 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1978 } 1979 ierr = PetscFree(sums);CHKERRQ(ierr); 1980 ierr = MatDenseRestoreArray(S_VVt,&marr); 1981 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1982 } 1983 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1984 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1985 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1986 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1987 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1988 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1989 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1990 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1991 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1992 } else { 1993 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1994 } 1995 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1996 1997 /* coarse basis functions */ 1998 for (i=0;i<n_vertices;i++) { 1999 PetscScalar *y; 2000 2001 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2002 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2003 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2004 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2005 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2006 y[n_B*i+idx_V_B[i]] = 1.0; 2007 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2008 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2009 2010 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2011 PetscInt j; 2012 2013 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2014 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2015 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2016 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2017 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2018 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2019 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2020 } 2021 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2022 } 2023 /* if n_R == 0 the object is not destroyed */ 2024 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2025 } 2026 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2027 2028 if (n_constraints) { 2029 Mat B; 2030 2031 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2032 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2033 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2034 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2035 if (n_vertices) { 2036 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2037 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2038 } else { 2039 Mat S_VCt; 2040 2041 if (lda_rhs != n_R) { 2042 ierr = MatDestroy(&B);CHKERRQ(ierr); 2043 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2044 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2045 } 2046 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2047 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2048 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2049 } 2050 } 2051 ierr = MatDestroy(&B);CHKERRQ(ierr); 2052 /* coarse basis functions */ 2053 for (i=0;i<n_constraints;i++) { 2054 PetscScalar *y; 2055 2056 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2057 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2058 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2059 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2060 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2061 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2062 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2063 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2064 PetscInt j; 2065 2066 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2067 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2068 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2071 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2072 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2073 } 2074 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2075 } 2076 } 2077 if (n_constraints) { 2078 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2079 } 2080 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2081 2082 /* coarse matrix entries relative to B_0 */ 2083 if (pcbddc->benign_n) { 2084 Mat B0_B,B0_BPHI; 2085 IS is_dummy; 2086 PetscScalar *data; 2087 PetscInt j; 2088 2089 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2090 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2091 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2092 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2093 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2094 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2095 for (j=0;j<pcbddc->benign_n;j++) { 2096 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2097 for (i=0;i<pcbddc->local_primal_size;i++) { 2098 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2099 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2100 } 2101 } 2102 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2103 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2104 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2105 } 2106 2107 /* compute other basis functions for non-symmetric problems */ 2108 if (!pcbddc->symmetric_primal) { 2109 Mat B_V=NULL,B_C=NULL; 2110 PetscScalar *marray; 2111 2112 if (n_constraints) { 2113 Mat S_CCT,C_CRT; 2114 2115 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2116 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2117 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2118 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2119 if (n_vertices) { 2120 Mat S_VCT; 2121 2122 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2123 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2124 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2125 } 2126 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2127 } 2128 if (n_vertices && n_R) { 2129 PetscScalar *av,*marray; 2130 const PetscInt *xadj,*adjncy; 2131 PetscInt n; 2132 PetscBool flg_row; 2133 2134 /* B_V = B_V - A_VR^T */ 2135 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2136 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2137 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2138 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2139 for (i=0;i<n;i++) { 2140 PetscInt j; 2141 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2142 } 2143 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2144 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2145 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2146 } 2147 2148 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2149 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2150 for (i=0;i<n_vertices;i++) { 2151 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2152 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2153 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2154 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2155 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2156 } 2157 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2158 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2159 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2160 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2161 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2162 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2163 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2164 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2165 } 2166 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2167 /* coarse basis functions */ 2168 for (i=0;i<pcbddc->local_primal_size;i++) { 2169 PetscScalar *y; 2170 2171 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2172 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2173 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2174 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2175 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2176 if (i<n_vertices) { 2177 y[n_B*i+idx_V_B[i]] = 1.0; 2178 } 2179 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2180 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2181 2182 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2183 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2184 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2185 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2186 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2187 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2188 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2189 } 2190 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2191 } 2192 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2193 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2194 } 2195 /* free memory */ 2196 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2197 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2198 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2199 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2200 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2201 ierr = PetscFree(work);CHKERRQ(ierr); 2202 if (n_vertices) { 2203 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2204 } 2205 if (n_constraints) { 2206 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2207 } 2208 /* Checking coarse_sub_mat and coarse basis functios */ 2209 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2210 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2211 if (pcbddc->dbg_flag) { 2212 Mat coarse_sub_mat; 2213 Mat AUXMAT,TM1,TM2,TM3,TM4; 2214 Mat coarse_phi_D,coarse_phi_B; 2215 Mat coarse_psi_D,coarse_psi_B; 2216 Mat A_II,A_BB,A_IB,A_BI; 2217 Mat C_B,CPHI; 2218 IS is_dummy; 2219 Vec mones; 2220 MatType checkmattype=MATSEQAIJ; 2221 PetscReal real_value; 2222 2223 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2224 Mat A; 2225 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2226 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2227 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2228 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2229 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2230 ierr = MatDestroy(&A);CHKERRQ(ierr); 2231 } else { 2232 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2233 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2234 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2235 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2236 } 2237 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2238 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2239 if (!pcbddc->symmetric_primal) { 2240 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2241 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2242 } 2243 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2244 2245 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2247 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2248 if (!pcbddc->symmetric_primal) { 2249 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2250 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2251 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2252 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2253 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2254 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2255 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2256 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2257 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2258 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2259 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2260 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2261 } else { 2262 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2263 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2264 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2265 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2266 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2267 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2268 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2269 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2270 } 2271 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2272 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2273 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2274 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2275 if (pcbddc->benign_n) { 2276 Mat B0_B,B0_BPHI; 2277 PetscScalar *data,*data2; 2278 PetscInt j; 2279 2280 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2281 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2282 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2283 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2284 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2285 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2286 for (j=0;j<pcbddc->benign_n;j++) { 2287 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2288 for (i=0;i<pcbddc->local_primal_size;i++) { 2289 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2290 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2291 } 2292 } 2293 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2294 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2295 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2296 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2297 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2298 } 2299 #if 0 2300 { 2301 PetscViewer viewer; 2302 char filename[256]; 2303 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2304 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2305 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2306 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2307 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2308 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2309 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2310 if (save_change) { 2311 Mat phi_B; 2312 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2313 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2314 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2315 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2316 } else { 2317 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2318 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2319 } 2320 if (pcbddc->coarse_phi_D) { 2321 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2322 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2323 } 2324 if (pcbddc->coarse_psi_B) { 2325 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2326 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2327 } 2328 if (pcbddc->coarse_psi_D) { 2329 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2330 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2331 } 2332 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2333 } 2334 #endif 2335 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2336 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2337 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2339 2340 /* check constraints */ 2341 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2342 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2343 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2344 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2345 } else { 2346 PetscScalar *data; 2347 Mat tmat; 2348 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2349 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2350 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2351 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2352 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2353 } 2354 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2355 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2356 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2357 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2359 if (!pcbddc->symmetric_primal) { 2360 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2361 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2362 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2363 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2364 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2365 } 2366 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2367 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2368 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2369 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2370 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2371 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2372 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2373 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2374 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2375 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2376 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2377 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2378 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2379 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2380 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2381 if (!pcbddc->symmetric_primal) { 2382 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2383 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2384 } 2385 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2386 } 2387 /* get back data */ 2388 *coarse_submat_vals_n = coarse_submat_vals; 2389 PetscFunctionReturn(0); 2390 } 2391 2392 #undef __FUNCT__ 2393 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2394 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2395 { 2396 Mat *work_mat; 2397 IS isrow_s,iscol_s; 2398 PetscBool rsorted,csorted; 2399 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2404 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2405 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2406 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2407 2408 if (!rsorted) { 2409 const PetscInt *idxs; 2410 PetscInt *idxs_sorted,i; 2411 2412 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2413 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2414 for (i=0;i<rsize;i++) { 2415 idxs_perm_r[i] = i; 2416 } 2417 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2418 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2419 for (i=0;i<rsize;i++) { 2420 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2421 } 2422 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2423 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2424 } else { 2425 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2426 isrow_s = isrow; 2427 } 2428 2429 if (!csorted) { 2430 if (isrow == iscol) { 2431 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2432 iscol_s = isrow_s; 2433 } else { 2434 const PetscInt *idxs; 2435 PetscInt *idxs_sorted,i; 2436 2437 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2438 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2439 for (i=0;i<csize;i++) { 2440 idxs_perm_c[i] = i; 2441 } 2442 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2443 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2444 for (i=0;i<csize;i++) { 2445 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2446 } 2447 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2448 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2449 } 2450 } else { 2451 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2452 iscol_s = iscol; 2453 } 2454 2455 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2456 2457 if (!rsorted || !csorted) { 2458 Mat new_mat; 2459 IS is_perm_r,is_perm_c; 2460 2461 if (!rsorted) { 2462 PetscInt *idxs_r,i; 2463 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2464 for (i=0;i<rsize;i++) { 2465 idxs_r[idxs_perm_r[i]] = i; 2466 } 2467 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2468 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2469 } else { 2470 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2471 } 2472 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2473 2474 if (!csorted) { 2475 if (isrow_s == iscol_s) { 2476 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2477 is_perm_c = is_perm_r; 2478 } else { 2479 PetscInt *idxs_c,i; 2480 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2481 for (i=0;i<csize;i++) { 2482 idxs_c[idxs_perm_c[i]] = i; 2483 } 2484 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2485 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2486 } 2487 } else { 2488 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2489 } 2490 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2491 2492 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2493 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2494 work_mat[0] = new_mat; 2495 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2496 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2497 } 2498 2499 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2500 *B = work_mat[0]; 2501 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2502 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2503 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 #undef __FUNCT__ 2508 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2509 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2510 { 2511 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2512 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2513 Mat new_mat; 2514 IS is_local,is_global; 2515 PetscInt local_size; 2516 PetscBool isseqaij; 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2521 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2522 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2523 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2524 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2525 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2526 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2527 2528 /* check */ 2529 if (pcbddc->dbg_flag) { 2530 Vec x,x_change; 2531 PetscReal error; 2532 2533 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2534 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2535 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2536 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2537 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2538 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2539 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2540 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2541 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2542 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2544 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2545 ierr = VecDestroy(&x);CHKERRQ(ierr); 2546 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2547 } 2548 2549 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2550 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2551 if (isseqaij) { 2552 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2553 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2554 } else { 2555 Mat work_mat; 2556 2557 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2558 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2559 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2560 } 2561 if (matis->A->symmetric_set) { 2562 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2563 #if !defined(PETSC_USE_COMPLEX) 2564 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2565 #endif 2566 } 2567 /* 2568 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2569 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2570 */ 2571 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 #undef __FUNCT__ 2576 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2577 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2578 { 2579 PC_IS* pcis = (PC_IS*)(pc->data); 2580 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2581 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2582 PetscInt *idx_R_local=NULL; 2583 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2584 PetscInt vbs,bs; 2585 PetscBT bitmask=NULL; 2586 PetscErrorCode ierr; 2587 2588 PetscFunctionBegin; 2589 /* 2590 No need to setup local scatters if 2591 - primal space is unchanged 2592 AND 2593 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2594 AND 2595 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2596 */ 2597 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2598 PetscFunctionReturn(0); 2599 } 2600 /* destroy old objects */ 2601 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2602 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2603 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2604 /* Set Non-overlapping dimensions */ 2605 n_B = pcis->n_B; 2606 n_D = pcis->n - n_B; 2607 n_vertices = pcbddc->n_vertices; 2608 2609 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2610 2611 /* create auxiliary bitmask and allocate workspace */ 2612 if (!sub_schurs->reuse_solver) { 2613 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2614 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2615 for (i=0;i<n_vertices;i++) { 2616 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2617 } 2618 2619 for (i=0, n_R=0; i<pcis->n; i++) { 2620 if (!PetscBTLookup(bitmask,i)) { 2621 idx_R_local[n_R++] = i; 2622 } 2623 } 2624 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2625 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2626 2627 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2628 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2629 } 2630 2631 /* Block code */ 2632 vbs = 1; 2633 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2634 if (bs>1 && !(n_vertices%bs)) { 2635 PetscBool is_blocked = PETSC_TRUE; 2636 PetscInt *vary; 2637 if (!sub_schurs->reuse_solver) { 2638 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2639 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2640 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2641 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2642 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2643 for (i=0; i<pcis->n/bs; i++) { 2644 if (vary[i]!=0 && vary[i]!=bs) { 2645 is_blocked = PETSC_FALSE; 2646 break; 2647 } 2648 } 2649 ierr = PetscFree(vary);CHKERRQ(ierr); 2650 } else { 2651 /* Verify directly the R set */ 2652 for (i=0; i<n_R/bs; i++) { 2653 PetscInt j,node=idx_R_local[bs*i]; 2654 for (j=1; j<bs; j++) { 2655 if (node != idx_R_local[bs*i+j]-j) { 2656 is_blocked = PETSC_FALSE; 2657 break; 2658 } 2659 } 2660 } 2661 } 2662 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2663 vbs = bs; 2664 for (i=0;i<n_R/vbs;i++) { 2665 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2666 } 2667 } 2668 } 2669 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2670 if (sub_schurs->reuse_solver) { 2671 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2672 2673 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2674 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2675 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2676 reuse_solver->is_R = pcbddc->is_R_local; 2677 } else { 2678 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2679 } 2680 2681 /* print some info if requested */ 2682 if (pcbddc->dbg_flag) { 2683 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2684 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2685 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2689 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2690 } 2691 2692 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2693 if (!sub_schurs->reuse_solver) { 2694 IS is_aux1,is_aux2; 2695 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2696 2697 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2698 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2699 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2700 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2701 for (i=0; i<n_D; i++) { 2702 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2703 } 2704 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2705 for (i=0, j=0; i<n_R; i++) { 2706 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2707 aux_array1[j++] = i; 2708 } 2709 } 2710 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2711 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2712 for (i=0, j=0; i<n_B; i++) { 2713 if (!PetscBTLookup(bitmask,is_indices[i])) { 2714 aux_array2[j++] = i; 2715 } 2716 } 2717 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2718 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2719 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2720 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2721 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2722 2723 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2724 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2725 for (i=0, j=0; i<n_R; i++) { 2726 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2727 aux_array1[j++] = i; 2728 } 2729 } 2730 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2731 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2732 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2733 } 2734 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2735 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2736 } else { 2737 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2738 IS tis; 2739 PetscInt schur_size; 2740 2741 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2742 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2743 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2744 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2745 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2746 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2747 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2748 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2749 } 2750 } 2751 PetscFunctionReturn(0); 2752 } 2753 2754 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2757 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2758 { 2759 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2760 PC_IS *pcis = (PC_IS*)pc->data; 2761 PC pc_temp; 2762 Mat A_RR; 2763 MatReuse reuse; 2764 PetscScalar m_one = -1.0; 2765 PetscReal value; 2766 PetscInt n_D,n_R; 2767 PetscBool use_exact,use_exact_reduced,issbaij; 2768 PetscErrorCode ierr; 2769 /* prefixes stuff */ 2770 char dir_prefix[256],neu_prefix[256],str_level[16]; 2771 size_t len; 2772 2773 PetscFunctionBegin; 2774 2775 /* compute prefixes */ 2776 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2777 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2778 if (!pcbddc->current_level) { 2779 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2780 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2781 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2782 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2783 } else { 2784 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2785 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2786 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2787 len -= 15; /* remove "pc_bddc_coarse_" */ 2788 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2789 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2790 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2791 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2792 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2793 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2794 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2795 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2796 } 2797 2798 /* DIRICHLET PROBLEM */ 2799 if (dirichlet) { 2800 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2801 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2802 if (!sub_schurs->reuse_solver) { 2803 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2804 } 2805 if (pcbddc->dbg_flag) { 2806 Mat A_IIn; 2807 2808 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2809 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2810 pcis->A_II = A_IIn; 2811 } 2812 } 2813 if (pcbddc->local_mat->symmetric_set) { 2814 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2815 } 2816 /* Matrix for Dirichlet problem is pcis->A_II */ 2817 n_D = pcis->n - pcis->n_B; 2818 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2819 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2820 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2821 /* default */ 2822 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2823 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2824 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2825 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2826 if (issbaij) { 2827 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2828 } else { 2829 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2830 } 2831 /* Allow user's customization */ 2832 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2833 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2834 } 2835 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2836 if (sub_schurs->reuse_solver) { 2837 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2838 2839 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2840 } 2841 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2842 if (!n_D) { 2843 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2844 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2845 } 2846 /* Set Up KSP for Dirichlet problem of BDDC */ 2847 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2848 /* set ksp_D into pcis data */ 2849 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2850 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2851 pcis->ksp_D = pcbddc->ksp_D; 2852 } 2853 2854 /* NEUMANN PROBLEM */ 2855 A_RR = 0; 2856 if (neumann) { 2857 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2858 PetscInt ibs,mbs; 2859 PetscBool issbaij; 2860 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2861 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2862 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2863 if (pcbddc->ksp_R) { /* already created ksp */ 2864 PetscInt nn_R; 2865 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2866 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2867 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2868 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2869 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2870 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2871 reuse = MAT_INITIAL_MATRIX; 2872 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2873 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2874 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2875 reuse = MAT_INITIAL_MATRIX; 2876 } else { /* safe to reuse the matrix */ 2877 reuse = MAT_REUSE_MATRIX; 2878 } 2879 } 2880 /* last check */ 2881 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2882 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2883 reuse = MAT_INITIAL_MATRIX; 2884 } 2885 } else { /* first time, so we need to create the matrix */ 2886 reuse = MAT_INITIAL_MATRIX; 2887 } 2888 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2889 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2890 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2891 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2892 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2893 if (matis->A == pcbddc->local_mat) { 2894 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2895 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2896 } else { 2897 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2898 } 2899 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2900 if (matis->A == pcbddc->local_mat) { 2901 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2902 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2903 } else { 2904 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2905 } 2906 } 2907 /* extract A_RR */ 2908 if (sub_schurs->reuse_solver) { 2909 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2910 2911 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2912 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2913 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2914 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2915 } else { 2916 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2917 } 2918 } else { 2919 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2920 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2921 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2922 } 2923 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2924 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2925 } 2926 if (pcbddc->local_mat->symmetric_set) { 2927 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2928 } 2929 if (!pcbddc->ksp_R) { /* create object if not present */ 2930 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2931 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2932 /* default */ 2933 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2934 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2935 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2936 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2937 if (issbaij) { 2938 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2939 } else { 2940 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2941 } 2942 /* Allow user's customization */ 2943 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2944 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2945 } 2946 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2947 if (!n_R) { 2948 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2949 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2950 } 2951 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2952 /* Reuse solver if it is present */ 2953 if (sub_schurs->reuse_solver) { 2954 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2955 2956 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2957 } 2958 /* Set Up KSP for Neumann problem of BDDC */ 2959 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2960 } 2961 2962 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2963 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2964 if (pcbddc->dbg_flag) { 2965 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2966 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2967 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2968 } 2969 if (dirichlet) { /* Dirichlet */ 2970 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2971 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2972 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2973 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2974 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2975 /* need to be adapted? */ 2976 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2977 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2978 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2979 /* print info */ 2980 if (pcbddc->dbg_flag) { 2981 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2982 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2983 } 2984 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2985 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2986 } 2987 } 2988 if (neumann) { /* Neumann */ 2989 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2990 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2991 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2992 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2993 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2994 /* need to be adapted? */ 2995 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2996 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2997 /* print info */ 2998 if (pcbddc->dbg_flag) { 2999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3001 } 3002 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3003 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3004 } 3005 } 3006 } 3007 /* free Neumann problem's matrix */ 3008 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3014 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3015 { 3016 PetscErrorCode ierr; 3017 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3018 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3019 3020 PetscFunctionBegin; 3021 if (!sub_schurs->reuse_solver) { 3022 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3023 } 3024 if (!pcbddc->switch_static) { 3025 if (applytranspose && pcbddc->local_auxmat1) { 3026 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3027 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3028 } 3029 if (!sub_schurs->reuse_solver) { 3030 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3031 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3032 } else { 3033 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3034 3035 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3036 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3037 } 3038 } else { 3039 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3040 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3041 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3042 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3043 if (applytranspose && pcbddc->local_auxmat1) { 3044 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3045 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3046 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3047 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3048 } 3049 } 3050 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3051 if (applytranspose) { 3052 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3053 } else { 3054 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3055 } 3056 } else { 3057 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3058 3059 if (applytranspose) { 3060 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3061 } else { 3062 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3063 } 3064 } 3065 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3066 if (!pcbddc->switch_static) { 3067 if (!sub_schurs->reuse_solver) { 3068 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3069 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3070 } else { 3071 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3072 3073 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3074 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3075 } 3076 if (!applytranspose && pcbddc->local_auxmat1) { 3077 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3078 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3079 } 3080 } else { 3081 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3082 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3083 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3084 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 if (!applytranspose && pcbddc->local_auxmat1) { 3086 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3087 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3088 } 3089 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3090 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3091 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3092 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3093 } 3094 PetscFunctionReturn(0); 3095 } 3096 3097 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3100 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3101 { 3102 PetscErrorCode ierr; 3103 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3104 PC_IS* pcis = (PC_IS*) (pc->data); 3105 const PetscScalar zero = 0.0; 3106 3107 PetscFunctionBegin; 3108 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3109 if (!pcbddc->benign_apply_coarse_only) { 3110 if (applytranspose) { 3111 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3112 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3113 } else { 3114 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3115 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3116 } 3117 } else { 3118 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3119 } 3120 3121 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3122 if (pcbddc->benign_n) { 3123 PetscScalar *array; 3124 PetscInt j; 3125 3126 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3127 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3128 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3129 } 3130 3131 /* start communications from local primal nodes to rhs of coarse solver */ 3132 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3133 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3134 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3135 3136 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3137 /* TODO remove null space when doing multilevel */ 3138 if (pcbddc->coarse_ksp) { 3139 Mat coarse_mat; 3140 Vec rhs,sol; 3141 MatNullSpace nullsp; 3142 3143 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3144 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3145 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3146 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3147 if (nullsp) { 3148 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3149 } 3150 if (applytranspose) { 3151 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3152 } else { 3153 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3154 } 3155 if (nullsp) { 3156 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3157 } 3158 } 3159 3160 /* Local solution on R nodes */ 3161 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3162 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3163 } 3164 /* communications from coarse sol to local primal nodes */ 3165 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3166 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3167 3168 /* Sum contributions from the two levels */ 3169 if (!pcbddc->benign_apply_coarse_only) { 3170 if (applytranspose) { 3171 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3172 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3173 } else { 3174 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3175 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3176 } 3177 /* store p0 */ 3178 if (pcbddc->benign_n) { 3179 PetscScalar *array; 3180 PetscInt j; 3181 3182 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3183 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3184 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3185 } 3186 } else { /* expand the coarse solution */ 3187 if (applytranspose) { 3188 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3189 } else { 3190 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3191 } 3192 } 3193 PetscFunctionReturn(0); 3194 } 3195 3196 #undef __FUNCT__ 3197 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3198 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3199 { 3200 PetscErrorCode ierr; 3201 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3202 PetscScalar *array; 3203 Vec from,to; 3204 3205 PetscFunctionBegin; 3206 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3207 from = pcbddc->coarse_vec; 3208 to = pcbddc->vec1_P; 3209 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3210 Vec tvec; 3211 3212 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3213 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3214 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3215 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3216 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3217 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3218 } 3219 } else { /* from local to global -> put data in coarse right hand side */ 3220 from = pcbddc->vec1_P; 3221 to = pcbddc->coarse_vec; 3222 } 3223 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3224 PetscFunctionReturn(0); 3225 } 3226 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3229 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3230 { 3231 PetscErrorCode ierr; 3232 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3233 PetscScalar *array; 3234 Vec from,to; 3235 3236 PetscFunctionBegin; 3237 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3238 from = pcbddc->coarse_vec; 3239 to = pcbddc->vec1_P; 3240 } else { /* from local to global -> put data in coarse right hand side */ 3241 from = pcbddc->vec1_P; 3242 to = pcbddc->coarse_vec; 3243 } 3244 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3245 if (smode == SCATTER_FORWARD) { 3246 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3247 Vec tvec; 3248 3249 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3250 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3251 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3252 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3253 } 3254 } else { 3255 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3256 ierr = VecResetArray(from);CHKERRQ(ierr); 3257 } 3258 } 3259 PetscFunctionReturn(0); 3260 } 3261 3262 /* uncomment for testing purposes */ 3263 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3266 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3267 { 3268 PetscErrorCode ierr; 3269 PC_IS* pcis = (PC_IS*)(pc->data); 3270 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3271 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3272 /* one and zero */ 3273 PetscScalar one=1.0,zero=0.0; 3274 /* space to store constraints and their local indices */ 3275 PetscScalar *constraints_data; 3276 PetscInt *constraints_idxs,*constraints_idxs_B; 3277 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3278 PetscInt *constraints_n; 3279 /* iterators */ 3280 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3281 /* BLAS integers */ 3282 PetscBLASInt lwork,lierr; 3283 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3284 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3285 /* reuse */ 3286 PetscInt olocal_primal_size,olocal_primal_size_cc; 3287 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3288 /* change of basis */ 3289 PetscBool qr_needed; 3290 PetscBT change_basis,qr_needed_idx; 3291 /* auxiliary stuff */ 3292 PetscInt *nnz,*is_indices; 3293 PetscInt ncc; 3294 /* some quantities */ 3295 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3296 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3297 3298 PetscFunctionBegin; 3299 /* Destroy Mat objects computed previously */ 3300 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3301 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3302 /* save info on constraints from previous setup (if any) */ 3303 olocal_primal_size = pcbddc->local_primal_size; 3304 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3305 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3306 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3307 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3308 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3309 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3310 3311 /* print some info */ 3312 if (pcbddc->dbg_flag) { 3313 IS vertices; 3314 PetscInt nv,nedges,nfaces; 3315 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3316 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3317 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3318 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3319 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3320 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3323 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3324 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3325 } 3326 3327 if (!pcbddc->adaptive_selection) { 3328 IS ISForVertices,*ISForFaces,*ISForEdges; 3329 MatNullSpace nearnullsp; 3330 const Vec *nearnullvecs; 3331 Vec *localnearnullsp; 3332 PetscScalar *array; 3333 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3334 PetscBool nnsp_has_cnst; 3335 /* LAPACK working arrays for SVD or POD */ 3336 PetscBool skip_lapack,boolforchange; 3337 PetscScalar *work; 3338 PetscReal *singular_vals; 3339 #if defined(PETSC_USE_COMPLEX) 3340 PetscReal *rwork; 3341 #endif 3342 #if defined(PETSC_MISSING_LAPACK_GESVD) 3343 PetscScalar *temp_basis,*correlation_mat; 3344 #else 3345 PetscBLASInt dummy_int=1; 3346 PetscScalar dummy_scalar=1.; 3347 #endif 3348 3349 /* Get index sets for faces, edges and vertices from graph */ 3350 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3351 /* free unneeded index sets */ 3352 if (!pcbddc->use_vertices) { 3353 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3354 } 3355 if (!pcbddc->use_edges) { 3356 for (i=0;i<n_ISForEdges;i++) { 3357 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3358 } 3359 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3360 n_ISForEdges = 0; 3361 } 3362 if (!pcbddc->use_faces) { 3363 for (i=0;i<n_ISForFaces;i++) { 3364 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3365 } 3366 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3367 n_ISForFaces = 0; 3368 } 3369 3370 #if defined(PETSC_USE_DEBUG) 3371 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3372 Also use_change_of_basis should be consistent among processors */ 3373 if (pcbddc->NullSpace) { 3374 PetscBool tbool[2],gbool[2]; 3375 3376 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3377 pcbddc->use_change_of_basis = PETSC_TRUE; 3378 if (!ISForEdges) { 3379 pcbddc->use_change_on_faces = PETSC_TRUE; 3380 } 3381 } 3382 tbool[0] = pcbddc->use_change_of_basis; 3383 tbool[1] = pcbddc->use_change_on_faces; 3384 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3385 pcbddc->use_change_of_basis = gbool[0]; 3386 pcbddc->use_change_on_faces = gbool[1]; 3387 } 3388 #endif 3389 3390 /* check if near null space is attached to global mat */ 3391 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3392 if (nearnullsp) { 3393 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3394 /* remove any stored info */ 3395 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3396 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3397 /* store information for BDDC solver reuse */ 3398 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3399 pcbddc->onearnullspace = nearnullsp; 3400 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3401 for (i=0;i<nnsp_size;i++) { 3402 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3403 } 3404 } else { /* if near null space is not provided BDDC uses constants by default */ 3405 nnsp_size = 0; 3406 nnsp_has_cnst = PETSC_TRUE; 3407 } 3408 /* get max number of constraints on a single cc */ 3409 max_constraints = nnsp_size; 3410 if (nnsp_has_cnst) max_constraints++; 3411 3412 /* 3413 Evaluate maximum storage size needed by the procedure 3414 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3415 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3416 There can be multiple constraints per connected component 3417 */ 3418 n_vertices = 0; 3419 if (ISForVertices) { 3420 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3421 } 3422 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3423 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3424 3425 total_counts = n_ISForFaces+n_ISForEdges; 3426 total_counts *= max_constraints; 3427 total_counts += n_vertices; 3428 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3429 3430 total_counts = 0; 3431 max_size_of_constraint = 0; 3432 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3433 IS used_is; 3434 if (i<n_ISForEdges) { 3435 used_is = ISForEdges[i]; 3436 } else { 3437 used_is = ISForFaces[i-n_ISForEdges]; 3438 } 3439 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3440 total_counts += j; 3441 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3442 } 3443 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3444 3445 /* get local part of global near null space vectors */ 3446 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3447 for (k=0;k<nnsp_size;k++) { 3448 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3449 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3450 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3451 } 3452 3453 /* whether or not to skip lapack calls */ 3454 skip_lapack = PETSC_TRUE; 3455 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3456 3457 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3458 if (!skip_lapack) { 3459 PetscScalar temp_work; 3460 3461 #if defined(PETSC_MISSING_LAPACK_GESVD) 3462 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3463 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3464 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3465 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3466 #if defined(PETSC_USE_COMPLEX) 3467 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3468 #endif 3469 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3470 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3471 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3472 lwork = -1; 3473 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3474 #if !defined(PETSC_USE_COMPLEX) 3475 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3476 #else 3477 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3478 #endif 3479 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3480 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3481 #else /* on missing GESVD */ 3482 /* SVD */ 3483 PetscInt max_n,min_n; 3484 max_n = max_size_of_constraint; 3485 min_n = max_constraints; 3486 if (max_size_of_constraint < max_constraints) { 3487 min_n = max_size_of_constraint; 3488 max_n = max_constraints; 3489 } 3490 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3491 #if defined(PETSC_USE_COMPLEX) 3492 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3493 #endif 3494 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3495 lwork = -1; 3496 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3497 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3498 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3499 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3500 #if !defined(PETSC_USE_COMPLEX) 3501 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3502 #else 3503 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3504 #endif 3505 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3506 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3507 #endif /* on missing GESVD */ 3508 /* Allocate optimal workspace */ 3509 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3510 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3511 } 3512 /* Now we can loop on constraining sets */ 3513 total_counts = 0; 3514 constraints_idxs_ptr[0] = 0; 3515 constraints_data_ptr[0] = 0; 3516 /* vertices */ 3517 if (n_vertices) { 3518 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3519 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3520 for (i=0;i<n_vertices;i++) { 3521 constraints_n[total_counts] = 1; 3522 constraints_data[total_counts] = 1.0; 3523 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3524 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3525 total_counts++; 3526 } 3527 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3528 n_vertices = total_counts; 3529 } 3530 3531 /* edges and faces */ 3532 total_counts_cc = total_counts; 3533 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3534 IS used_is; 3535 PetscBool idxs_copied = PETSC_FALSE; 3536 3537 if (ncc<n_ISForEdges) { 3538 used_is = ISForEdges[ncc]; 3539 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3540 } else { 3541 used_is = ISForFaces[ncc-n_ISForEdges]; 3542 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3543 } 3544 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3545 3546 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3547 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3548 /* change of basis should not be performed on local periodic nodes */ 3549 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3550 if (nnsp_has_cnst) { 3551 PetscScalar quad_value; 3552 3553 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3554 idxs_copied = PETSC_TRUE; 3555 3556 if (!pcbddc->use_nnsp_true) { 3557 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3558 } else { 3559 quad_value = 1.0; 3560 } 3561 for (j=0;j<size_of_constraint;j++) { 3562 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3563 } 3564 temp_constraints++; 3565 total_counts++; 3566 } 3567 for (k=0;k<nnsp_size;k++) { 3568 PetscReal real_value; 3569 PetscScalar *ptr_to_data; 3570 3571 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3572 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3573 for (j=0;j<size_of_constraint;j++) { 3574 ptr_to_data[j] = array[is_indices[j]]; 3575 } 3576 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3577 /* check if array is null on the connected component */ 3578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3579 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3580 if (real_value > 0.0) { /* keep indices and values */ 3581 temp_constraints++; 3582 total_counts++; 3583 if (!idxs_copied) { 3584 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3585 idxs_copied = PETSC_TRUE; 3586 } 3587 } 3588 } 3589 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3590 valid_constraints = temp_constraints; 3591 if (!pcbddc->use_nnsp_true && temp_constraints) { 3592 if (temp_constraints == 1) { /* just normalize the constraint */ 3593 PetscScalar norm,*ptr_to_data; 3594 3595 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3596 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3597 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3598 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3599 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3600 } else { /* perform SVD */ 3601 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3602 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3603 3604 #if defined(PETSC_MISSING_LAPACK_GESVD) 3605 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3606 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3607 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3608 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3609 from that computed using LAPACKgesvd 3610 -> This is due to a different computation of eigenvectors in LAPACKheev 3611 -> The quality of the POD-computed basis will be the same */ 3612 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3613 /* Store upper triangular part of correlation matrix */ 3614 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3615 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3616 for (j=0;j<temp_constraints;j++) { 3617 for (k=0;k<j+1;k++) { 3618 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3619 } 3620 } 3621 /* compute eigenvalues and eigenvectors of correlation matrix */ 3622 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3623 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3624 #if !defined(PETSC_USE_COMPLEX) 3625 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3626 #else 3627 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3628 #endif 3629 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3630 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3631 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3632 j = 0; 3633 while (j < temp_constraints && singular_vals[j] < tol) j++; 3634 total_counts = total_counts-j; 3635 valid_constraints = temp_constraints-j; 3636 /* scale and copy POD basis into used quadrature memory */ 3637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3638 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3639 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3640 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3641 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3642 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3643 if (j<temp_constraints) { 3644 PetscInt ii; 3645 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3646 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3647 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3648 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3649 for (k=0;k<temp_constraints-j;k++) { 3650 for (ii=0;ii<size_of_constraint;ii++) { 3651 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3652 } 3653 } 3654 } 3655 #else /* on missing GESVD */ 3656 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3657 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3658 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3659 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3660 #if !defined(PETSC_USE_COMPLEX) 3661 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3662 #else 3663 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3664 #endif 3665 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3666 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3667 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3668 k = temp_constraints; 3669 if (k > size_of_constraint) k = size_of_constraint; 3670 j = 0; 3671 while (j < k && singular_vals[k-j-1] < tol) j++; 3672 valid_constraints = k-j; 3673 total_counts = total_counts-temp_constraints+valid_constraints; 3674 #endif /* on missing GESVD */ 3675 } 3676 } 3677 /* update pointers information */ 3678 if (valid_constraints) { 3679 constraints_n[total_counts_cc] = valid_constraints; 3680 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3681 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3682 /* set change_of_basis flag */ 3683 if (boolforchange) { 3684 PetscBTSet(change_basis,total_counts_cc); 3685 } 3686 total_counts_cc++; 3687 } 3688 } 3689 /* free workspace */ 3690 if (!skip_lapack) { 3691 ierr = PetscFree(work);CHKERRQ(ierr); 3692 #if defined(PETSC_USE_COMPLEX) 3693 ierr = PetscFree(rwork);CHKERRQ(ierr); 3694 #endif 3695 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3696 #if defined(PETSC_MISSING_LAPACK_GESVD) 3697 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3698 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3699 #endif 3700 } 3701 for (k=0;k<nnsp_size;k++) { 3702 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3703 } 3704 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3705 /* free index sets of faces, edges and vertices */ 3706 for (i=0;i<n_ISForFaces;i++) { 3707 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3708 } 3709 if (n_ISForFaces) { 3710 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3711 } 3712 for (i=0;i<n_ISForEdges;i++) { 3713 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3714 } 3715 if (n_ISForEdges) { 3716 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3717 } 3718 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3719 } else { 3720 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3721 3722 total_counts = 0; 3723 n_vertices = 0; 3724 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3725 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3726 } 3727 max_constraints = 0; 3728 total_counts_cc = 0; 3729 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3730 total_counts += pcbddc->adaptive_constraints_n[i]; 3731 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3732 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3733 } 3734 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3735 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3736 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3737 constraints_data = pcbddc->adaptive_constraints_data; 3738 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3739 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3740 total_counts_cc = 0; 3741 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3742 if (pcbddc->adaptive_constraints_n[i]) { 3743 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3744 } 3745 } 3746 #if 0 3747 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3748 for (i=0;i<total_counts_cc;i++) { 3749 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3750 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3751 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3752 printf(" %d",constraints_idxs[j]); 3753 } 3754 printf("\n"); 3755 printf("number of cc: %d\n",constraints_n[i]); 3756 } 3757 for (i=0;i<n_vertices;i++) { 3758 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3759 } 3760 for (i=0;i<sub_schurs->n_subs;i++) { 3761 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3762 } 3763 #endif 3764 3765 max_size_of_constraint = 0; 3766 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3767 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3768 /* Change of basis */ 3769 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3770 if (pcbddc->use_change_of_basis) { 3771 for (i=0;i<sub_schurs->n_subs;i++) { 3772 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3773 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3774 } 3775 } 3776 } 3777 } 3778 pcbddc->local_primal_size = total_counts; 3779 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3780 3781 /* map constraints_idxs in boundary numbering */ 3782 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3783 if (i != constraints_idxs_ptr[total_counts_cc]) { 3784 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3785 } 3786 3787 /* Create constraint matrix */ 3788 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3789 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3790 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3791 3792 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3793 /* determine if a QR strategy is needed for change of basis */ 3794 qr_needed = PETSC_FALSE; 3795 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3796 total_primal_vertices=0; 3797 pcbddc->local_primal_size_cc = 0; 3798 for (i=0;i<total_counts_cc;i++) { 3799 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3800 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3801 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3802 pcbddc->local_primal_size_cc += 1; 3803 } else if (PetscBTLookup(change_basis,i)) { 3804 for (k=0;k<constraints_n[i];k++) { 3805 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3806 } 3807 pcbddc->local_primal_size_cc += constraints_n[i]; 3808 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3809 PetscBTSet(qr_needed_idx,i); 3810 qr_needed = PETSC_TRUE; 3811 } 3812 } else { 3813 pcbddc->local_primal_size_cc += 1; 3814 } 3815 } 3816 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3817 pcbddc->n_vertices = total_primal_vertices; 3818 /* permute indices in order to have a sorted set of vertices */ 3819 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3820 3821 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3822 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3823 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3824 3825 /* nonzero structure of constraint matrix */ 3826 /* and get reference dof for local constraints */ 3827 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3828 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3829 3830 j = total_primal_vertices; 3831 total_counts = total_primal_vertices; 3832 cum = total_primal_vertices; 3833 for (i=n_vertices;i<total_counts_cc;i++) { 3834 if (!PetscBTLookup(change_basis,i)) { 3835 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3836 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3837 cum++; 3838 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3839 for (k=0;k<constraints_n[i];k++) { 3840 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3841 nnz[j+k] = size_of_constraint; 3842 } 3843 j += constraints_n[i]; 3844 } 3845 } 3846 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3847 ierr = PetscFree(nnz);CHKERRQ(ierr); 3848 3849 /* set values in constraint matrix */ 3850 for (i=0;i<total_primal_vertices;i++) { 3851 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3852 } 3853 total_counts = total_primal_vertices; 3854 for (i=n_vertices;i<total_counts_cc;i++) { 3855 if (!PetscBTLookup(change_basis,i)) { 3856 PetscInt *cols; 3857 3858 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3859 cols = constraints_idxs+constraints_idxs_ptr[i]; 3860 for (k=0;k<constraints_n[i];k++) { 3861 PetscInt row = total_counts+k; 3862 PetscScalar *vals; 3863 3864 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3865 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3866 } 3867 total_counts += constraints_n[i]; 3868 } 3869 } 3870 /* assembling */ 3871 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 3874 /* 3875 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3876 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3877 */ 3878 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3879 if (pcbddc->use_change_of_basis) { 3880 /* dual and primal dofs on a single cc */ 3881 PetscInt dual_dofs,primal_dofs; 3882 /* working stuff for GEQRF */ 3883 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3884 PetscBLASInt lqr_work; 3885 /* working stuff for UNGQR */ 3886 PetscScalar *gqr_work,lgqr_work_t; 3887 PetscBLASInt lgqr_work; 3888 /* working stuff for TRTRS */ 3889 PetscScalar *trs_rhs; 3890 PetscBLASInt Blas_NRHS; 3891 /* pointers for values insertion into change of basis matrix */ 3892 PetscInt *start_rows,*start_cols; 3893 PetscScalar *start_vals; 3894 /* working stuff for values insertion */ 3895 PetscBT is_primal; 3896 PetscInt *aux_primal_numbering_B; 3897 /* matrix sizes */ 3898 PetscInt global_size,local_size; 3899 /* temporary change of basis */ 3900 Mat localChangeOfBasisMatrix; 3901 /* extra space for debugging */ 3902 PetscScalar *dbg_work; 3903 3904 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3905 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3906 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3907 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3908 /* nonzeros for local mat */ 3909 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3910 if (!pcbddc->benign_change || pcbddc->fake_change) { 3911 for (i=0;i<pcis->n;i++) nnz[i]=1; 3912 } else { 3913 const PetscInt *ii; 3914 PetscInt n; 3915 PetscBool flg_row; 3916 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3917 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3918 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3919 } 3920 for (i=n_vertices;i<total_counts_cc;i++) { 3921 if (PetscBTLookup(change_basis,i)) { 3922 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3923 if (PetscBTLookup(qr_needed_idx,i)) { 3924 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3925 } else { 3926 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3927 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3928 } 3929 } 3930 } 3931 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3932 ierr = PetscFree(nnz);CHKERRQ(ierr); 3933 /* Set interior change in the matrix */ 3934 if (!pcbddc->benign_change || pcbddc->fake_change) { 3935 for (i=0;i<pcis->n;i++) { 3936 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3937 } 3938 } else { 3939 const PetscInt *ii,*jj; 3940 PetscScalar *aa; 3941 PetscInt n; 3942 PetscBool flg_row; 3943 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3944 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3945 for (i=0;i<n;i++) { 3946 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3947 } 3948 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3949 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3950 } 3951 3952 if (pcbddc->dbg_flag) { 3953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3955 } 3956 3957 3958 /* Now we loop on the constraints which need a change of basis */ 3959 /* 3960 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3961 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3962 3963 Basic blocks of change of basis matrix T computed by 3964 3965 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3966 3967 | 1 0 ... 0 s_1/S | 3968 | 0 1 ... 0 s_2/S | 3969 | ... | 3970 | 0 ... 1 s_{n-1}/S | 3971 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3972 3973 with S = \sum_{i=1}^n s_i^2 3974 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3975 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3976 3977 - QR decomposition of constraints otherwise 3978 */ 3979 if (qr_needed) { 3980 /* space to store Q */ 3981 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3982 /* first we issue queries for optimal work */ 3983 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3984 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3985 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3986 lqr_work = -1; 3987 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3988 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3989 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3990 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3991 lgqr_work = -1; 3992 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3993 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3994 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3995 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3996 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3997 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3998 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3999 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4000 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4001 /* array to store scaling factors for reflectors */ 4002 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4003 /* array to store rhs and solution of triangular solver */ 4004 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4005 /* allocating workspace for check */ 4006 if (pcbddc->dbg_flag) { 4007 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4008 } 4009 } 4010 /* array to store whether a node is primal or not */ 4011 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4012 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4013 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4014 if (i != total_primal_vertices) { 4015 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 4016 } 4017 for (i=0;i<total_primal_vertices;i++) { 4018 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4019 } 4020 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4021 4022 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4023 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4024 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4025 if (PetscBTLookup(change_basis,total_counts)) { 4026 /* get constraint info */ 4027 primal_dofs = constraints_n[total_counts]; 4028 dual_dofs = size_of_constraint-primal_dofs; 4029 4030 if (pcbddc->dbg_flag) { 4031 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4032 } 4033 4034 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4035 4036 /* copy quadrature constraints for change of basis check */ 4037 if (pcbddc->dbg_flag) { 4038 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4039 } 4040 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4041 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4042 4043 /* compute QR decomposition of constraints */ 4044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4045 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4046 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4047 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4048 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4049 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4050 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4051 4052 /* explictly compute R^-T */ 4053 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4054 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4055 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4056 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4058 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4059 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4060 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4061 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4062 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4063 4064 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4065 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4067 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4068 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4069 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4070 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4071 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4072 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4073 4074 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4075 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4076 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4078 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4079 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4081 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4082 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4083 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4084 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4085 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4086 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4087 4088 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4089 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4090 /* insert cols for primal dofs */ 4091 for (j=0;j<primal_dofs;j++) { 4092 start_vals = &qr_basis[j*size_of_constraint]; 4093 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4094 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4095 } 4096 /* insert cols for dual dofs */ 4097 for (j=0,k=0;j<dual_dofs;k++) { 4098 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4099 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4100 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4101 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4102 j++; 4103 } 4104 } 4105 4106 /* check change of basis */ 4107 if (pcbddc->dbg_flag) { 4108 PetscInt ii,jj; 4109 PetscBool valid_qr=PETSC_TRUE; 4110 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4113 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4115 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4117 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4118 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4119 for (jj=0;jj<size_of_constraint;jj++) { 4120 for (ii=0;ii<primal_dofs;ii++) { 4121 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4122 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4123 } 4124 } 4125 if (!valid_qr) { 4126 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4127 for (jj=0;jj<size_of_constraint;jj++) { 4128 for (ii=0;ii<primal_dofs;ii++) { 4129 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4130 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4131 } 4132 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4133 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4134 } 4135 } 4136 } 4137 } else { 4138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4139 } 4140 } 4141 } else { /* simple transformation block */ 4142 PetscInt row,col; 4143 PetscScalar val,norm; 4144 4145 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4146 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4147 for (j=0;j<size_of_constraint;j++) { 4148 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4149 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4150 if (!PetscBTLookup(is_primal,row_B)) { 4151 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4152 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4153 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4154 } else { 4155 for (k=0;k<size_of_constraint;k++) { 4156 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4157 if (row != col) { 4158 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4159 } else { 4160 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4161 } 4162 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4163 } 4164 } 4165 } 4166 if (pcbddc->dbg_flag) { 4167 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4168 } 4169 } 4170 } else { 4171 if (pcbddc->dbg_flag) { 4172 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4173 } 4174 } 4175 } 4176 4177 /* free workspace */ 4178 if (qr_needed) { 4179 if (pcbddc->dbg_flag) { 4180 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4181 } 4182 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4183 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4184 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4185 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4186 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4187 } 4188 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4189 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4190 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4191 4192 /* assembling of global change of variable */ 4193 if (!pcbddc->fake_change) { 4194 Mat tmat; 4195 PetscInt bs; 4196 4197 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4198 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4199 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4200 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4201 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4202 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4203 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4204 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4205 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4206 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4207 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4208 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4209 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4210 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4211 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4212 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4213 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4214 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4215 4216 /* check */ 4217 if (pcbddc->dbg_flag) { 4218 PetscReal error; 4219 Vec x,x_change; 4220 4221 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4222 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4223 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4224 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4225 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4226 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4227 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4228 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4229 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4230 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4231 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4232 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4233 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4234 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4235 ierr = VecDestroy(&x);CHKERRQ(ierr); 4236 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4237 } 4238 /* adapt sub_schurs computed (if any) */ 4239 if (pcbddc->use_deluxe_scaling) { 4240 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4241 4242 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4243 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4244 } 4245 if (sub_schurs->S_Ej_all) { 4246 Mat S_new,tmat; 4247 IS is_all_N,is_V_Sall = NULL; 4248 4249 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4250 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4251 if (pcbddc->deluxe_zerorows) { 4252 ISLocalToGlobalMapping NtoSall; 4253 IS is_V; 4254 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4255 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4256 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4257 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4258 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4259 } 4260 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4261 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4262 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4263 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4264 if (pcbddc->deluxe_zerorows) { 4265 const PetscScalar *array; 4266 const PetscInt *idxs_V,*idxs_all; 4267 PetscInt i,n_V; 4268 4269 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4270 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4271 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4272 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4273 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4274 for (i=0;i<n_V;i++) { 4275 PetscScalar val; 4276 PetscInt idx; 4277 4278 idx = idxs_V[i]; 4279 val = array[idxs_all[idxs_V[i]]]; 4280 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4281 } 4282 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4283 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4284 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4285 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4286 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4287 } 4288 sub_schurs->S_Ej_all = S_new; 4289 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4290 if (sub_schurs->sum_S_Ej_all) { 4291 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4292 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4293 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4294 if (pcbddc->deluxe_zerorows) { 4295 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4296 } 4297 sub_schurs->sum_S_Ej_all = S_new; 4298 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4299 } 4300 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4301 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4302 } 4303 /* destroy any change of basis context in sub_schurs */ 4304 if (sub_schurs->change) { 4305 PetscInt i; 4306 4307 for (i=0;i<sub_schurs->n_subs;i++) { 4308 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4309 } 4310 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4311 } 4312 } 4313 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4314 /* determine if any process has changed the pressures locally */ 4315 if (pcbddc->benign_saddle_point) { 4316 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4317 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4318 } 4319 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4320 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4321 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4322 pcbddc->use_qr_single = qr_needed; 4323 } 4324 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4325 PetscBool needglobal = PETSC_FALSE; 4326 if (pcbddc->benign_saddle_point) { 4327 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4328 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4329 } 4330 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4331 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4332 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4333 } else { 4334 Mat benign_global = NULL; 4335 if (needglobal) { 4336 Mat tmat; 4337 4338 pcbddc->change_interior = PETSC_TRUE; 4339 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4340 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4341 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4342 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4343 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4344 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4345 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4346 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4347 if (pcbddc->benign_change) { 4348 Mat M; 4349 4350 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4351 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4352 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4353 ierr = MatDestroy(&M);CHKERRQ(ierr); 4354 } else { 4355 Mat eye; 4356 PetscScalar *array; 4357 4358 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4359 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4360 for (i=0;i<pcis->n;i++) { 4361 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4362 } 4363 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4364 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4365 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4366 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4367 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4368 } 4369 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4370 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4371 } 4372 if (pcbddc->user_ChangeOfBasisMatrix) { 4373 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4374 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4375 } else if (needglobal) { 4376 pcbddc->ChangeOfBasisMatrix = benign_global; 4377 } 4378 } 4379 } 4380 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4381 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4382 } 4383 4384 if (!pcbddc->fake_change) { 4385 /* add pressure dofs to set of primal nodes for numbering purposes */ 4386 for (i=0;i<pcbddc->benign_n;i++) { 4387 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4388 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4389 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4390 pcbddc->local_primal_size_cc++; 4391 pcbddc->local_primal_size++; 4392 } 4393 4394 /* check if a new primal space has been introduced (also take into account benign trick) */ 4395 pcbddc->new_primal_space_local = PETSC_TRUE; 4396 if (olocal_primal_size == pcbddc->local_primal_size) { 4397 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4398 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4399 if (!pcbddc->new_primal_space_local) { 4400 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4401 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4402 } 4403 } 4404 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4405 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4406 } 4407 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4408 4409 /* flush dbg viewer */ 4410 if (pcbddc->dbg_flag) { 4411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4412 } 4413 4414 /* free workspace */ 4415 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4416 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4417 if (!pcbddc->adaptive_selection) { 4418 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4419 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4420 } else { 4421 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4422 pcbddc->adaptive_constraints_idxs_ptr, 4423 pcbddc->adaptive_constraints_data_ptr, 4424 pcbddc->adaptive_constraints_idxs, 4425 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4426 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4427 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4428 } 4429 PetscFunctionReturn(0); 4430 } 4431 4432 #undef __FUNCT__ 4433 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4434 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4435 { 4436 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4437 PC_IS *pcis = (PC_IS*)pc->data; 4438 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4439 PetscInt ierr,i,N; 4440 PetscViewer viewer=pcbddc->dbg_viewer; 4441 4442 PetscFunctionBegin; 4443 /* Reset previously computed graph */ 4444 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4445 /* Init local Graph struct */ 4446 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4447 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4448 4449 /* Check validity of the csr graph passed in by the user */ 4450 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4451 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4452 } 4453 4454 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4455 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4456 PetscInt *xadj,*adjncy; 4457 PetscInt nvtxs; 4458 PetscBool flg_row=PETSC_FALSE; 4459 4460 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4461 if (flg_row) { 4462 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4463 pcbddc->computed_rowadj = PETSC_TRUE; 4464 } 4465 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4466 } 4467 if (pcbddc->dbg_flag) { 4468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4469 } 4470 4471 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4472 if (pcbddc->user_provided_isfordofs) { 4473 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4474 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4475 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4476 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4477 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4478 } 4479 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4480 pcbddc->n_ISForDofs = 0; 4481 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4482 } 4483 } else { 4484 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4485 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4486 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4487 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4488 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4489 } 4490 } 4491 } 4492 4493 /* Setup of Graph */ 4494 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4495 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4496 } 4497 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4498 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4499 } 4500 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4501 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4502 } 4503 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4504 4505 /* attach info on disconnected subdomains if present */ 4506 if (pcbddc->n_local_subs) { 4507 PetscInt *local_subs; 4508 4509 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4510 for (i=0;i<pcbddc->n_local_subs;i++) { 4511 const PetscInt *idxs; 4512 PetscInt nl,j; 4513 4514 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4515 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4516 for (j=0;j<nl;j++) { 4517 local_subs[idxs[j]] = i; 4518 } 4519 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4520 } 4521 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4522 pcbddc->mat_graph->local_subs = local_subs; 4523 } 4524 4525 /* Graph's connected components analysis */ 4526 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4527 4528 /* print some info to stdout */ 4529 if (pcbddc->dbg_flag) { 4530 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4531 } 4532 PetscFunctionReturn(0); 4533 } 4534 4535 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4536 #undef __FUNCT__ 4537 #define __FUNCT__ "PCBDDCSubsetNumbering" 4538 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4539 { 4540 PetscSF sf; 4541 PetscLayout map; 4542 const PetscInt *idxs; 4543 PetscInt *leaf_data,*root_data,*gidxs; 4544 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4545 PetscInt n_n,nlocals,start,first_index; 4546 PetscMPIInt commsize; 4547 PetscBool first_found; 4548 PetscErrorCode ierr; 4549 4550 PetscFunctionBegin; 4551 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4552 if (subset_mult) { 4553 PetscCheckSameComm(subset,1,subset_mult,2); 4554 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4555 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4556 } 4557 /* create workspace layout for computing global indices of subset */ 4558 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4559 lbounds[0] = lbounds[1] = 0; 4560 for (i=0;i<n;i++) { 4561 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4562 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4563 } 4564 lbounds[0] = -lbounds[0]; 4565 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4566 gbounds[0] = -gbounds[0]; 4567 N = gbounds[1] - gbounds[0] + 1; 4568 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4569 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4570 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4571 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4572 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4573 4574 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4575 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4576 if (subset_mult) { 4577 const PetscInt* idxs_mult; 4578 4579 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4580 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4581 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4582 } else { 4583 for (i=0;i<n;i++) leaf_data[i] = 1; 4584 } 4585 /* local size of new subset */ 4586 n_n = 0; 4587 for (i=0;i<n;i++) n_n += leaf_data[i]; 4588 4589 /* global indexes in layout */ 4590 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4591 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4592 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4593 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4594 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4595 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4596 4597 /* reduce from leaves to roots */ 4598 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4599 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4600 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4601 4602 /* count indexes in local part of layout */ 4603 nlocals = 0; 4604 first_index = -1; 4605 first_found = PETSC_FALSE; 4606 for (i=0;i<Nl;i++) { 4607 if (!first_found && root_data[i]) { 4608 first_found = PETSC_TRUE; 4609 first_index = i; 4610 } 4611 nlocals += root_data[i]; 4612 } 4613 4614 /* cumulative of number of indexes and size of subset without holes */ 4615 #if defined(PETSC_HAVE_MPI_EXSCAN) 4616 start = 0; 4617 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4618 #else 4619 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4620 start = start-nlocals; 4621 #endif 4622 4623 if (N_n) { /* compute total size of new subset if requested */ 4624 *N_n = start + nlocals; 4625 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4626 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4627 } 4628 4629 /* adapt root data with cumulative */ 4630 if (first_found) { 4631 PetscInt old_index; 4632 4633 root_data[first_index] += start; 4634 old_index = first_index; 4635 for (i=first_index+1;i<Nl;i++) { 4636 if (root_data[i]) { 4637 root_data[i] += root_data[old_index]; 4638 old_index = i; 4639 } 4640 } 4641 } 4642 4643 /* from roots to leaves */ 4644 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4645 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4646 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4647 4648 /* create new IS with global indexes without holes */ 4649 if (subset_mult) { 4650 const PetscInt* idxs_mult; 4651 PetscInt cum; 4652 4653 cum = 0; 4654 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4655 for (i=0;i<n;i++) { 4656 PetscInt j; 4657 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4658 } 4659 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4660 } else { 4661 for (i=0;i<n;i++) { 4662 gidxs[i] = leaf_data[i]-1; 4663 } 4664 } 4665 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4666 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4667 PetscFunctionReturn(0); 4668 } 4669 4670 #undef __FUNCT__ 4671 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4672 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4673 { 4674 PetscInt i,j; 4675 PetscScalar *alphas; 4676 PetscErrorCode ierr; 4677 4678 PetscFunctionBegin; 4679 /* this implements stabilized Gram-Schmidt */ 4680 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4681 for (i=0;i<n;i++) { 4682 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4683 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4684 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4685 } 4686 ierr = PetscFree(alphas);CHKERRQ(ierr); 4687 PetscFunctionReturn(0); 4688 } 4689 4690 #undef __FUNCT__ 4691 #define __FUNCT__ "MatISGetSubassemblingPattern" 4692 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4693 { 4694 IS ranks_send_to; 4695 PetscInt n_neighs,*neighs,*n_shared,**shared; 4696 PetscMPIInt size,rank,color; 4697 PetscInt *xadj,*adjncy; 4698 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4699 PetscInt i,local_size,threshold=0; 4700 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4701 PetscSubcomm subcomm; 4702 PetscErrorCode ierr; 4703 4704 PetscFunctionBegin; 4705 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4706 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4707 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4708 4709 /* Get info on mapping */ 4710 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4711 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4712 4713 /* build local CSR graph of subdomains' connectivity */ 4714 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4715 xadj[0] = 0; 4716 xadj[1] = PetscMax(n_neighs-1,0); 4717 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4718 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4719 4720 if (threshold) { 4721 PetscInt xadj_count = 0; 4722 for (i=1;i<n_neighs;i++) { 4723 if (n_shared[i] > threshold) { 4724 adjncy[xadj_count] = neighs[i]; 4725 adjncy_wgt[xadj_count] = n_shared[i]; 4726 xadj_count++; 4727 } 4728 } 4729 xadj[1] = xadj_count; 4730 } else { 4731 if (xadj[1]) { 4732 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4733 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4734 } 4735 } 4736 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4737 if (use_square) { 4738 for (i=0;i<xadj[1];i++) { 4739 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4740 } 4741 } 4742 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4743 4744 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4745 4746 /* 4747 Restrict work on active processes only. 4748 */ 4749 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4750 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4751 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4752 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4753 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4754 if (color) { 4755 ierr = PetscFree(xadj);CHKERRQ(ierr); 4756 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4757 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4758 } else { 4759 Mat subdomain_adj; 4760 IS new_ranks,new_ranks_contig; 4761 MatPartitioning partitioner; 4762 PetscInt prank,rstart=0,rend=0; 4763 PetscInt *is_indices,*oldranks; 4764 PetscBool aggregate; 4765 4766 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4767 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4768 prank = rank; 4769 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4770 /* 4771 for (i=0;i<size;i++) { 4772 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4773 } 4774 */ 4775 for (i=0;i<xadj[1];i++) { 4776 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4777 } 4778 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4779 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4780 if (aggregate) { 4781 PetscInt lrows,row,ncols,*cols; 4782 PetscMPIInt nrank; 4783 PetscScalar *vals; 4784 4785 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4786 lrows = 0; 4787 if (nrank<redprocs) { 4788 lrows = size/redprocs; 4789 if (nrank<size%redprocs) lrows++; 4790 } 4791 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4792 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4793 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4794 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4795 row = nrank; 4796 ncols = xadj[1]-xadj[0]; 4797 cols = adjncy; 4798 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4799 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4800 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4801 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4802 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4803 ierr = PetscFree(xadj);CHKERRQ(ierr); 4804 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4805 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4806 ierr = PetscFree(vals);CHKERRQ(ierr); 4807 } else { 4808 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4809 } 4810 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4811 4812 /* Partition */ 4813 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4814 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4815 if (use_vwgt) { 4816 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4817 v_wgt[0] = local_size; 4818 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4819 } 4820 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4821 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4822 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4823 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4824 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4825 4826 /* renumber new_ranks to avoid "holes" in new set of processors */ 4827 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4828 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4829 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4830 if (!redprocs) { 4831 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4832 } else { 4833 PetscInt idxs[1]; 4834 PetscMPIInt tag; 4835 MPI_Request *reqs; 4836 4837 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4838 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4839 for (i=rstart;i<rend;i++) { 4840 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4841 } 4842 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4843 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4844 ierr = PetscFree(reqs);CHKERRQ(ierr); 4845 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4846 } 4847 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4848 /* clean up */ 4849 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4850 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4851 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4852 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4853 } 4854 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4855 4856 /* assemble parallel IS for sends */ 4857 i = 1; 4858 if (color) i=0; 4859 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4860 /* get back IS */ 4861 *is_sends = ranks_send_to; 4862 PetscFunctionReturn(0); 4863 } 4864 4865 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4866 4867 #undef __FUNCT__ 4868 #define __FUNCT__ "MatISSubassemble" 4869 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4870 { 4871 Mat local_mat; 4872 IS is_sends_internal; 4873 PetscInt rows,cols,new_local_rows; 4874 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4875 PetscBool ismatis,isdense,newisdense,destroy_mat; 4876 ISLocalToGlobalMapping l2gmap; 4877 PetscInt* l2gmap_indices; 4878 const PetscInt* is_indices; 4879 MatType new_local_type; 4880 /* buffers */ 4881 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4882 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4883 PetscInt *recv_buffer_idxs_local; 4884 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4885 /* MPI */ 4886 MPI_Comm comm,comm_n; 4887 PetscSubcomm subcomm; 4888 PetscMPIInt n_sends,n_recvs,commsize; 4889 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4890 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4891 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4892 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4893 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4894 PetscErrorCode ierr; 4895 4896 PetscFunctionBegin; 4897 /* TODO: add missing checks */ 4898 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4899 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4900 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4901 PetscValidLogicalCollectiveInt(mat,nis,7); 4902 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4903 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4904 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4905 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4906 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4907 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4908 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4909 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4910 PetscInt mrows,mcols,mnrows,mncols; 4911 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4912 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4913 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4914 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4915 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4916 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4917 } 4918 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4919 PetscValidLogicalCollectiveInt(mat,bs,0); 4920 /* prepare IS for sending if not provided */ 4921 if (!is_sends) { 4922 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4923 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4924 } else { 4925 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4926 is_sends_internal = is_sends; 4927 } 4928 4929 /* get comm */ 4930 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4931 4932 /* compute number of sends */ 4933 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4934 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4935 4936 /* compute number of receives */ 4937 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4938 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4939 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4940 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4941 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4942 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4943 ierr = PetscFree(iflags);CHKERRQ(ierr); 4944 4945 /* restrict comm if requested */ 4946 subcomm = 0; 4947 destroy_mat = PETSC_FALSE; 4948 if (restrict_comm) { 4949 PetscMPIInt color,subcommsize; 4950 4951 color = 0; 4952 if (restrict_full) { 4953 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4954 } else { 4955 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4956 } 4957 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4958 subcommsize = commsize - subcommsize; 4959 /* check if reuse has been requested */ 4960 if (reuse == MAT_REUSE_MATRIX) { 4961 if (*mat_n) { 4962 PetscMPIInt subcommsize2; 4963 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4964 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4965 comm_n = PetscObjectComm((PetscObject)*mat_n); 4966 } else { 4967 comm_n = PETSC_COMM_SELF; 4968 } 4969 } else { /* MAT_INITIAL_MATRIX */ 4970 PetscMPIInt rank; 4971 4972 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4973 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4974 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4975 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4976 comm_n = PetscSubcommChild(subcomm); 4977 } 4978 /* flag to destroy *mat_n if not significative */ 4979 if (color) destroy_mat = PETSC_TRUE; 4980 } else { 4981 comm_n = comm; 4982 } 4983 4984 /* prepare send/receive buffers */ 4985 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4986 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4987 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4988 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4989 if (nis) { 4990 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4991 } 4992 4993 /* Get data from local matrices */ 4994 if (!isdense) { 4995 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4996 /* TODO: See below some guidelines on how to prepare the local buffers */ 4997 /* 4998 send_buffer_vals should contain the raw values of the local matrix 4999 send_buffer_idxs should contain: 5000 - MatType_PRIVATE type 5001 - PetscInt size_of_l2gmap 5002 - PetscInt global_row_indices[size_of_l2gmap] 5003 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5004 */ 5005 } else { 5006 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5007 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5008 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5009 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5010 send_buffer_idxs[1] = i; 5011 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5012 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5013 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5014 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5015 for (i=0;i<n_sends;i++) { 5016 ilengths_vals[is_indices[i]] = len*len; 5017 ilengths_idxs[is_indices[i]] = len+2; 5018 } 5019 } 5020 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5021 /* additional is (if any) */ 5022 if (nis) { 5023 PetscMPIInt psum; 5024 PetscInt j; 5025 for (j=0,psum=0;j<nis;j++) { 5026 PetscInt plen; 5027 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5028 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5029 psum += len+1; /* indices + lenght */ 5030 } 5031 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5032 for (j=0,psum=0;j<nis;j++) { 5033 PetscInt plen; 5034 const PetscInt *is_array_idxs; 5035 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5036 send_buffer_idxs_is[psum] = plen; 5037 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5038 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5039 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5040 psum += plen+1; /* indices + lenght */ 5041 } 5042 for (i=0;i<n_sends;i++) { 5043 ilengths_idxs_is[is_indices[i]] = psum; 5044 } 5045 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5046 } 5047 5048 buf_size_idxs = 0; 5049 buf_size_vals = 0; 5050 buf_size_idxs_is = 0; 5051 for (i=0;i<n_recvs;i++) { 5052 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5053 buf_size_vals += (PetscInt)olengths_vals[i]; 5054 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5055 } 5056 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5057 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5058 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5059 5060 /* get new tags for clean communications */ 5061 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5062 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5063 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5064 5065 /* allocate for requests */ 5066 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5067 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5068 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5069 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5070 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5071 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5072 5073 /* communications */ 5074 ptr_idxs = recv_buffer_idxs; 5075 ptr_vals = recv_buffer_vals; 5076 ptr_idxs_is = recv_buffer_idxs_is; 5077 for (i=0;i<n_recvs;i++) { 5078 source_dest = onodes[i]; 5079 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5080 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5081 ptr_idxs += olengths_idxs[i]; 5082 ptr_vals += olengths_vals[i]; 5083 if (nis) { 5084 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5085 ptr_idxs_is += olengths_idxs_is[i]; 5086 } 5087 } 5088 for (i=0;i<n_sends;i++) { 5089 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5090 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5091 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5092 if (nis) { 5093 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5094 } 5095 } 5096 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5097 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5098 5099 /* assemble new l2g map */ 5100 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5101 ptr_idxs = recv_buffer_idxs; 5102 new_local_rows = 0; 5103 for (i=0;i<n_recvs;i++) { 5104 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5105 ptr_idxs += olengths_idxs[i]; 5106 } 5107 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5108 ptr_idxs = recv_buffer_idxs; 5109 new_local_rows = 0; 5110 for (i=0;i<n_recvs;i++) { 5111 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5112 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5113 ptr_idxs += olengths_idxs[i]; 5114 } 5115 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5116 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5117 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5118 5119 /* infer new local matrix type from received local matrices type */ 5120 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5121 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5122 if (n_recvs) { 5123 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5124 ptr_idxs = recv_buffer_idxs; 5125 for (i=0;i<n_recvs;i++) { 5126 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5127 new_local_type_private = MATAIJ_PRIVATE; 5128 break; 5129 } 5130 ptr_idxs += olengths_idxs[i]; 5131 } 5132 switch (new_local_type_private) { 5133 case MATDENSE_PRIVATE: 5134 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5135 new_local_type = MATSEQAIJ; 5136 bs = 1; 5137 } else { /* if I receive only 1 dense matrix */ 5138 new_local_type = MATSEQDENSE; 5139 bs = 1; 5140 } 5141 break; 5142 case MATAIJ_PRIVATE: 5143 new_local_type = MATSEQAIJ; 5144 bs = 1; 5145 break; 5146 case MATBAIJ_PRIVATE: 5147 new_local_type = MATSEQBAIJ; 5148 break; 5149 case MATSBAIJ_PRIVATE: 5150 new_local_type = MATSEQSBAIJ; 5151 break; 5152 default: 5153 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5154 break; 5155 } 5156 } else { /* by default, new_local_type is seqdense */ 5157 new_local_type = MATSEQDENSE; 5158 bs = 1; 5159 } 5160 5161 /* create MATIS object if needed */ 5162 if (reuse == MAT_INITIAL_MATRIX) { 5163 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5164 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5165 } else { 5166 /* it also destroys the local matrices */ 5167 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5168 } 5169 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5170 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5171 5172 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5173 5174 /* Global to local map of received indices */ 5175 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5176 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5177 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5178 5179 /* restore attributes -> type of incoming data and its size */ 5180 buf_size_idxs = 0; 5181 for (i=0;i<n_recvs;i++) { 5182 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5183 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5184 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5185 } 5186 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5187 5188 /* set preallocation */ 5189 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5190 if (!newisdense) { 5191 PetscInt *new_local_nnz=0; 5192 5193 ptr_vals = recv_buffer_vals; 5194 ptr_idxs = recv_buffer_idxs_local; 5195 if (n_recvs) { 5196 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5197 } 5198 for (i=0;i<n_recvs;i++) { 5199 PetscInt j; 5200 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5201 for (j=0;j<*(ptr_idxs+1);j++) { 5202 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5203 } 5204 } else { 5205 /* TODO */ 5206 } 5207 ptr_idxs += olengths_idxs[i]; 5208 } 5209 if (new_local_nnz) { 5210 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5211 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5212 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5213 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5214 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5215 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5216 } else { 5217 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5218 } 5219 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5220 } else { 5221 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5222 } 5223 5224 /* set values */ 5225 ptr_vals = recv_buffer_vals; 5226 ptr_idxs = recv_buffer_idxs_local; 5227 for (i=0;i<n_recvs;i++) { 5228 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5229 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5230 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5231 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5232 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5233 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5234 } else { 5235 /* TODO */ 5236 } 5237 ptr_idxs += olengths_idxs[i]; 5238 ptr_vals += olengths_vals[i]; 5239 } 5240 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5241 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5242 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5243 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5244 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5245 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5246 5247 #if 0 5248 if (!restrict_comm) { /* check */ 5249 Vec lvec,rvec; 5250 PetscReal infty_error; 5251 5252 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5253 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5254 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5255 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5256 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5257 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5258 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5259 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5260 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5261 } 5262 #endif 5263 5264 /* assemble new additional is (if any) */ 5265 if (nis) { 5266 PetscInt **temp_idxs,*count_is,j,psum; 5267 5268 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5269 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5270 ptr_idxs = recv_buffer_idxs_is; 5271 psum = 0; 5272 for (i=0;i<n_recvs;i++) { 5273 for (j=0;j<nis;j++) { 5274 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5275 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5276 psum += plen; 5277 ptr_idxs += plen+1; /* shift pointer to received data */ 5278 } 5279 } 5280 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5281 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5282 for (i=1;i<nis;i++) { 5283 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5284 } 5285 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5286 ptr_idxs = recv_buffer_idxs_is; 5287 for (i=0;i<n_recvs;i++) { 5288 for (j=0;j<nis;j++) { 5289 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5290 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5291 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5292 ptr_idxs += plen+1; /* shift pointer to received data */ 5293 } 5294 } 5295 for (i=0;i<nis;i++) { 5296 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5297 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5298 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5299 } 5300 ierr = PetscFree(count_is);CHKERRQ(ierr); 5301 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5302 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5303 } 5304 /* free workspace */ 5305 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5306 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5307 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5308 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5309 if (isdense) { 5310 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5311 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5312 } else { 5313 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5314 } 5315 if (nis) { 5316 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5317 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5318 } 5319 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5320 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5321 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5322 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5323 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5324 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5325 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5326 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5327 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5328 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5329 ierr = PetscFree(onodes);CHKERRQ(ierr); 5330 if (nis) { 5331 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5332 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5333 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5334 } 5335 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5336 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5337 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5338 for (i=0;i<nis;i++) { 5339 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5340 } 5341 *mat_n = NULL; 5342 } 5343 PetscFunctionReturn(0); 5344 } 5345 5346 /* temporary hack into ksp private data structure */ 5347 #include <petsc/private/kspimpl.h> 5348 5349 #undef __FUNCT__ 5350 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5351 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5352 { 5353 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5354 PC_IS *pcis = (PC_IS*)pc->data; 5355 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5356 MatNullSpace CoarseNullSpace=NULL; 5357 ISLocalToGlobalMapping coarse_islg; 5358 IS coarse_is,*isarray; 5359 PetscInt i,im_active=-1,active_procs=-1; 5360 PetscInt nis,nisdofs,nisneu,nisvert; 5361 PC pc_temp; 5362 PCType coarse_pc_type; 5363 KSPType coarse_ksp_type; 5364 PetscBool multilevel_requested,multilevel_allowed; 5365 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5366 Mat t_coarse_mat_is; 5367 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5368 PetscMPIInt all_procs; 5369 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5370 PetscBool compute_vecs = PETSC_FALSE; 5371 PetscScalar *array; 5372 PetscErrorCode ierr; 5373 5374 PetscFunctionBegin; 5375 /* Assign global numbering to coarse dofs */ 5376 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5377 PetscInt ocoarse_size; 5378 compute_vecs = PETSC_TRUE; 5379 ocoarse_size = pcbddc->coarse_size; 5380 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5381 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5382 /* see if we can avoid some work */ 5383 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5384 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5385 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5386 PC pc; 5387 PetscBool isbddc; 5388 5389 /* temporary workaround since PCBDDC does not have a reset method so far */ 5390 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5391 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5392 if (isbddc) { 5393 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5394 } else { 5395 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5396 } 5397 coarse_reuse = PETSC_FALSE; 5398 } else { /* we can safely reuse already computed coarse matrix */ 5399 coarse_reuse = PETSC_TRUE; 5400 } 5401 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5402 coarse_reuse = PETSC_FALSE; 5403 } 5404 /* reset any subassembling information */ 5405 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5406 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5407 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5408 coarse_reuse = PETSC_TRUE; 5409 } 5410 5411 /* count "active" (i.e. with positive local size) and "void" processes */ 5412 im_active = !!(pcis->n); 5413 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5414 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5415 void_procs = all_procs-active_procs; 5416 csin_type_simple = PETSC_TRUE; 5417 redist = PETSC_FALSE; 5418 if (pcbddc->current_level && void_procs) { 5419 csin_ml = PETSC_TRUE; 5420 ncoarse_ml = void_procs; 5421 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5422 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5423 csin_ds = PETSC_TRUE; 5424 ncoarse_ds = pcbddc->redistribute_coarse; 5425 redist = PETSC_TRUE; 5426 } else { 5427 csin_ds = PETSC_TRUE; 5428 ncoarse_ds = active_procs; 5429 redist = PETSC_TRUE; 5430 } 5431 } else { 5432 csin_ml = PETSC_FALSE; 5433 ncoarse_ml = all_procs; 5434 if (void_procs) { 5435 csin_ds = PETSC_TRUE; 5436 ncoarse_ds = void_procs; 5437 csin_type_simple = PETSC_FALSE; 5438 } else { 5439 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5440 csin_ds = PETSC_TRUE; 5441 ncoarse_ds = pcbddc->redistribute_coarse; 5442 redist = PETSC_TRUE; 5443 } else { 5444 csin_ds = PETSC_FALSE; 5445 ncoarse_ds = all_procs; 5446 } 5447 } 5448 } 5449 5450 /* 5451 test if we can go multilevel: three conditions must be satisfied: 5452 - we have not exceeded the number of levels requested 5453 - we can actually subassemble the active processes 5454 - we can find a suitable number of MPI processes where we can place the subassembled problem 5455 */ 5456 multilevel_allowed = PETSC_FALSE; 5457 multilevel_requested = PETSC_FALSE; 5458 if (pcbddc->current_level < pcbddc->max_levels) { 5459 multilevel_requested = PETSC_TRUE; 5460 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5461 multilevel_allowed = PETSC_FALSE; 5462 } else { 5463 multilevel_allowed = PETSC_TRUE; 5464 } 5465 } 5466 /* determine number of process partecipating to coarse solver */ 5467 if (multilevel_allowed) { 5468 ncoarse = ncoarse_ml; 5469 csin = csin_ml; 5470 redist = PETSC_FALSE; 5471 } else { 5472 ncoarse = ncoarse_ds; 5473 csin = csin_ds; 5474 } 5475 5476 /* creates temporary l2gmap and IS for coarse indexes */ 5477 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5478 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5479 5480 /* creates temporary MATIS object for coarse matrix */ 5481 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5482 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5483 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5484 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5485 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5486 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5487 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5488 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5489 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5490 5491 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5492 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5493 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5494 const PetscInt *idxs; 5495 ISLocalToGlobalMapping tmap; 5496 5497 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5498 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5499 /* allocate space for temporary storage */ 5500 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5501 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5502 /* allocate for IS array */ 5503 nisdofs = pcbddc->n_ISForDofsLocal; 5504 nisneu = !!pcbddc->NeumannBoundariesLocal; 5505 nisvert = 0; 5506 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5507 nisvert = 1; 5508 } 5509 nis = nisdofs + nisneu + nisvert; 5510 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5511 /* dofs splitting */ 5512 for (i=0;i<nisdofs;i++) { 5513 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5514 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5515 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5516 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5517 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5518 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5519 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5520 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5521 } 5522 /* neumann boundaries */ 5523 if (pcbddc->NeumannBoundariesLocal) { 5524 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5525 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5526 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5527 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5528 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5529 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5530 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5531 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5532 } 5533 /* primal vertices (benign) */ 5534 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5535 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5536 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5537 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5538 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5539 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5540 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5541 } 5542 /* free memory */ 5543 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5544 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5545 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5546 } else { 5547 nis = 0; 5548 nisdofs = 0; 5549 nisneu = 0; 5550 nisvert = 0; 5551 isarray = NULL; 5552 } 5553 /* destroy no longer needed map */ 5554 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5555 5556 /* restrict on coarse candidates (if needed) */ 5557 coarse_mat_is = NULL; 5558 if (csin) { 5559 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5560 if (redist) { 5561 PetscMPIInt rank; 5562 PetscInt spc,n_spc_p1,dest[1],destsize; 5563 5564 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5565 spc = active_procs/ncoarse; 5566 n_spc_p1 = active_procs%ncoarse; 5567 if (im_active) { 5568 destsize = 1; 5569 if (rank > n_spc_p1*(spc+1)-1) { 5570 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5571 } else { 5572 dest[0] = rank/(spc+1); 5573 } 5574 } else { 5575 destsize = 0; 5576 } 5577 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5578 } else if (csin_type_simple) { 5579 PetscMPIInt rank; 5580 PetscInt issize,isidx; 5581 5582 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5583 if (im_active) { 5584 issize = 1; 5585 isidx = (PetscInt)rank; 5586 } else { 5587 issize = 0; 5588 isidx = -1; 5589 } 5590 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5591 } else { /* get a suitable subassembling pattern from MATIS code */ 5592 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5593 } 5594 5595 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5596 if (!redist || ncoarse <= void_procs) { 5597 PetscInt ncoarse_cand,tissize,*nisindices; 5598 PetscInt *coarse_candidates; 5599 const PetscInt* tisindices; 5600 5601 /* get coarse candidates' ranks in pc communicator */ 5602 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5603 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5604 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5605 if (!coarse_candidates[i]) { 5606 coarse_candidates[ncoarse_cand++]=i; 5607 } 5608 } 5609 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5610 5611 5612 if (pcbddc->dbg_flag) { 5613 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5614 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5615 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5616 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5617 for (i=0;i<ncoarse_cand;i++) { 5618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5619 } 5620 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5621 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5622 } 5623 /* shift the pattern on coarse candidates */ 5624 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5625 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5626 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5627 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5628 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5629 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5630 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5631 } 5632 if (pcbddc->dbg_flag) { 5633 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5635 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5636 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5637 } 5638 } 5639 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5640 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5641 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5642 } else { /* this is the last level, so use just receiving processes in subcomm */ 5643 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5644 } 5645 } else { 5646 if (pcbddc->dbg_flag) { 5647 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5648 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5650 } 5651 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5652 coarse_mat_is = t_coarse_mat_is; 5653 } 5654 5655 /* create local to global scatters for coarse problem */ 5656 if (compute_vecs) { 5657 PetscInt lrows; 5658 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5659 if (coarse_mat_is) { 5660 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5661 } else { 5662 lrows = 0; 5663 } 5664 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5665 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5666 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5667 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5668 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5669 } 5670 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5671 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5672 5673 /* set defaults for coarse KSP and PC */ 5674 if (multilevel_allowed) { 5675 coarse_ksp_type = KSPRICHARDSON; 5676 coarse_pc_type = PCBDDC; 5677 } else { 5678 coarse_ksp_type = KSPPREONLY; 5679 coarse_pc_type = PCREDUNDANT; 5680 } 5681 5682 /* print some info if requested */ 5683 if (pcbddc->dbg_flag) { 5684 if (!multilevel_allowed) { 5685 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5686 if (multilevel_requested) { 5687 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5688 } else if (pcbddc->max_levels) { 5689 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5690 } 5691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5692 } 5693 } 5694 5695 /* create the coarse KSP object only once with defaults */ 5696 if (coarse_mat_is) { 5697 MatReuse coarse_mat_reuse; 5698 PetscViewer dbg_viewer = NULL; 5699 if (pcbddc->dbg_flag) { 5700 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5701 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5702 } 5703 if (!pcbddc->coarse_ksp) { 5704 char prefix[256],str_level[16]; 5705 size_t len; 5706 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5707 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5708 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5709 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5710 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5711 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5712 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5713 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5714 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5715 /* prefix */ 5716 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5717 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5718 if (!pcbddc->current_level) { 5719 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5720 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5721 } else { 5722 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5723 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5724 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5725 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5726 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5727 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5728 } 5729 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5730 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5731 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5732 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5733 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5734 /* allow user customization */ 5735 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5736 } 5737 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5738 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5739 if (nisdofs) { 5740 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5741 for (i=0;i<nisdofs;i++) { 5742 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5743 } 5744 } 5745 if (nisneu) { 5746 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5747 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5748 } 5749 if (nisvert) { 5750 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5751 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5752 } 5753 5754 /* get some info after set from options */ 5755 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5756 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5757 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5758 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5759 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5760 isbddc = PETSC_FALSE; 5761 } 5762 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5763 if (isredundant) { 5764 KSP inner_ksp; 5765 PC inner_pc; 5766 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5767 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5768 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5769 } 5770 5771 /* assemble coarse matrix */ 5772 if (coarse_reuse) { 5773 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5774 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5775 coarse_mat_reuse = MAT_REUSE_MATRIX; 5776 } else { 5777 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5778 } 5779 if (isbddc || isnn) { 5780 if (isbddc) { /* currently there are no APIs for these options */ 5781 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5782 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5783 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5784 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5785 if (pcbddc_coarse->benign_compute_nonetflux) { 5786 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5787 } 5788 } 5789 if (pcbddc->coarsening_ratio > 1) { 5790 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5791 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5792 if (pcbddc->dbg_flag) { 5793 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5794 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5795 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5796 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5797 } 5798 } 5799 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5800 } else { 5801 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5802 coarse_mat = coarse_mat_is; 5803 } 5804 } else { 5805 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5806 } 5807 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5808 5809 /* propagate symmetry info of coarse matrix */ 5810 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5811 if (pc->pmat->symmetric_set) { 5812 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5813 } 5814 if (pc->pmat->hermitian_set) { 5815 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5816 } 5817 if (pc->pmat->spd_set) { 5818 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5819 } 5820 /* set operators */ 5821 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5822 if (pcbddc->dbg_flag) { 5823 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5824 } 5825 } else { /* processes non partecipating to coarse solver (if any) */ 5826 coarse_mat = 0; 5827 } 5828 ierr = PetscFree(isarray);CHKERRQ(ierr); 5829 #if 0 5830 { 5831 PetscViewer viewer; 5832 char filename[256]; 5833 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5834 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5835 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5836 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5837 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5838 } 5839 #endif 5840 5841 /* Compute coarse null space (special handling by BDDC only) */ 5842 #if 0 5843 if (pcbddc->NullSpace) { 5844 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5845 } 5846 #endif 5847 /* hack */ 5848 if (pcbddc->coarse_ksp) { 5849 Vec crhs,csol; 5850 5851 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5852 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5853 if (!csol) { 5854 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5855 } 5856 if (!crhs) { 5857 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5858 } 5859 } 5860 5861 /* compute null space for coarse solver if the benign trick has been requested */ 5862 if (pcbddc->benign_null) { 5863 5864 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5865 for (i=0;i<pcbddc->benign_n;i++) { 5866 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5867 } 5868 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5869 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5870 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5871 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5872 if (coarse_mat) { 5873 Vec nullv; 5874 PetscScalar *array,*array2; 5875 PetscInt nl; 5876 5877 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5878 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5879 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5880 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5881 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5882 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5883 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5884 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5885 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5886 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5887 } 5888 } 5889 5890 if (pcbddc->coarse_ksp) { 5891 PetscBool ispreonly; 5892 5893 if (CoarseNullSpace) { 5894 PetscBool isnull; 5895 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5896 if (0) { 5897 if (isbddc && !pcbddc->benign_saddle_point) { 5898 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5899 } else { 5900 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5901 } 5902 } else { 5903 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5904 } 5905 } 5906 /* setup coarse ksp */ 5907 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5908 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5909 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5910 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5911 KSP check_ksp; 5912 KSPType check_ksp_type; 5913 PC check_pc; 5914 Vec check_vec,coarse_vec; 5915 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5916 PetscInt its; 5917 PetscBool compute_eigs; 5918 PetscReal *eigs_r,*eigs_c; 5919 PetscInt neigs; 5920 const char *prefix; 5921 5922 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5923 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5924 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5925 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5926 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5927 if (ispreonly) { 5928 check_ksp_type = KSPPREONLY; 5929 compute_eigs = PETSC_FALSE; 5930 } else { 5931 check_ksp_type = KSPGMRES; 5932 compute_eigs = PETSC_TRUE; 5933 } 5934 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5935 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5936 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5937 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5938 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5939 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5940 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5941 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5942 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5943 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5944 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5945 /* create random vec */ 5946 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5947 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5948 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5949 if (CoarseNullSpace) { 5950 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5951 } 5952 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5953 /* solve coarse problem */ 5954 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5955 if (CoarseNullSpace) { 5956 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5957 } 5958 /* set eigenvalue estimation if preonly has not been requested */ 5959 if (compute_eigs) { 5960 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5961 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5962 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5963 lambda_max = eigs_r[neigs-1]; 5964 lambda_min = eigs_r[0]; 5965 if (pcbddc->use_coarse_estimates) { 5966 if (lambda_max>lambda_min) { 5967 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5968 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5969 } 5970 } 5971 } 5972 5973 /* check coarse problem residual error */ 5974 if (pcbddc->dbg_flag) { 5975 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5976 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5977 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5978 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5979 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5980 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5981 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5982 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5983 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5984 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5985 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5986 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5987 if (CoarseNullSpace) { 5988 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5989 } 5990 if (compute_eigs) { 5991 PetscReal lambda_max_s,lambda_min_s; 5992 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5993 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5994 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5995 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5996 for (i=0;i<neigs;i++) { 5997 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5998 } 5999 } 6000 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6001 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6002 } 6003 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6004 if (compute_eigs) { 6005 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6006 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6007 } 6008 } 6009 } 6010 /* print additional info */ 6011 if (pcbddc->dbg_flag) { 6012 /* waits until all processes reaches this point */ 6013 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6014 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6015 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6016 } 6017 6018 /* free memory */ 6019 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6020 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6021 PetscFunctionReturn(0); 6022 } 6023 6024 #undef __FUNCT__ 6025 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6026 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6027 { 6028 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6029 PC_IS* pcis = (PC_IS*)pc->data; 6030 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6031 IS subset,subset_mult,subset_n; 6032 PetscInt local_size,coarse_size=0; 6033 PetscInt *local_primal_indices=NULL; 6034 const PetscInt *t_local_primal_indices; 6035 PetscErrorCode ierr; 6036 6037 PetscFunctionBegin; 6038 /* Compute global number of coarse dofs */ 6039 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 6040 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6041 } 6042 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6043 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6044 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6045 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6046 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6047 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6048 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6049 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6050 if (local_size != pcbddc->local_primal_size) { 6051 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 6052 } 6053 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6054 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6055 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6056 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6057 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6058 6059 /* check numbering */ 6060 if (pcbddc->dbg_flag) { 6061 PetscScalar coarsesum,*array,*array2; 6062 PetscInt i; 6063 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6064 6065 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6068 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6069 /* counter */ 6070 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6071 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6072 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6073 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6074 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6075 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6076 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6077 for (i=0;i<pcbddc->local_primal_size;i++) { 6078 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6079 } 6080 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6081 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6082 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6083 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6084 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6085 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6086 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6087 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6088 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6089 for (i=0;i<pcis->n;i++) { 6090 if (array[i] != 0.0 && array[i] != array2[i]) { 6091 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6092 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6093 set_error = PETSC_TRUE; 6094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6095 } 6096 } 6097 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6098 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6099 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6100 for (i=0;i<pcis->n;i++) { 6101 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6102 } 6103 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6104 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6105 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6106 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6107 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6108 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6109 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6110 PetscInt *gidxs; 6111 6112 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6113 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6115 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6116 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6117 for (i=0;i<pcbddc->local_primal_size;i++) { 6118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6119 } 6120 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6121 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6122 } 6123 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6124 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6125 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6126 } 6127 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6128 /* get back data */ 6129 *coarse_size_n = coarse_size; 6130 *local_primal_indices_n = local_primal_indices; 6131 PetscFunctionReturn(0); 6132 } 6133 6134 #undef __FUNCT__ 6135 #define __FUNCT__ "PCBDDCGlobalToLocal" 6136 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6137 { 6138 IS localis_t; 6139 PetscInt i,lsize,*idxs,n; 6140 PetscScalar *vals; 6141 PetscErrorCode ierr; 6142 6143 PetscFunctionBegin; 6144 /* get indices in local ordering exploiting local to global map */ 6145 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6146 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6147 for (i=0;i<lsize;i++) vals[i] = 1.0; 6148 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6149 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6150 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6151 if (idxs) { /* multilevel guard */ 6152 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6153 } 6154 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6155 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6156 ierr = PetscFree(vals);CHKERRQ(ierr); 6157 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6158 /* now compute set in local ordering */ 6159 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6160 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6161 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6162 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6163 for (i=0,lsize=0;i<n;i++) { 6164 if (PetscRealPart(vals[i]) > 0.5) { 6165 lsize++; 6166 } 6167 } 6168 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6169 for (i=0,lsize=0;i<n;i++) { 6170 if (PetscRealPart(vals[i]) > 0.5) { 6171 idxs[lsize++] = i; 6172 } 6173 } 6174 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6176 *localis = localis_t; 6177 PetscFunctionReturn(0); 6178 } 6179 6180 #undef __FUNCT__ 6181 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6182 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6183 { 6184 PC_IS *pcis=(PC_IS*)pc->data; 6185 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6186 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6187 Mat S_j; 6188 PetscInt *used_xadj,*used_adjncy; 6189 PetscBool free_used_adj; 6190 PetscErrorCode ierr; 6191 6192 PetscFunctionBegin; 6193 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6194 free_used_adj = PETSC_FALSE; 6195 if (pcbddc->sub_schurs_layers == -1) { 6196 used_xadj = NULL; 6197 used_adjncy = NULL; 6198 } else { 6199 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6200 used_xadj = pcbddc->mat_graph->xadj; 6201 used_adjncy = pcbddc->mat_graph->adjncy; 6202 } else if (pcbddc->computed_rowadj) { 6203 used_xadj = pcbddc->mat_graph->xadj; 6204 used_adjncy = pcbddc->mat_graph->adjncy; 6205 } else { 6206 PetscBool flg_row=PETSC_FALSE; 6207 const PetscInt *xadj,*adjncy; 6208 PetscInt nvtxs; 6209 6210 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6211 if (flg_row) { 6212 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6213 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6214 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6215 free_used_adj = PETSC_TRUE; 6216 } else { 6217 pcbddc->sub_schurs_layers = -1; 6218 used_xadj = NULL; 6219 used_adjncy = NULL; 6220 } 6221 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6222 } 6223 } 6224 6225 /* setup sub_schurs data */ 6226 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6227 if (!sub_schurs->schur_explicit) { 6228 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6229 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6230 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6231 } else { 6232 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6233 PetscBool isseqaij; 6234 PetscInt benign_n; 6235 Mat change = NULL; 6236 IS change_primal = NULL; 6237 6238 if (!pcbddc->use_vertices && reuse_solvers) { 6239 PetscInt n_vertices; 6240 6241 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6242 reuse_solvers = (PetscBool)!n_vertices; 6243 } 6244 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6245 if (!isseqaij) { 6246 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6247 if (matis->A == pcbddc->local_mat) { 6248 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6249 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6250 } else { 6251 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6252 } 6253 } 6254 if (!pcbddc->benign_change_explicit) { 6255 benign_n = pcbddc->benign_n; 6256 } else { 6257 benign_n = 0; 6258 } 6259 /* If the user defines additional constraints, we import them here. 6260 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6261 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6262 PC_IS *pcisf; 6263 PC_BDDC *pcbddcf; 6264 PC pcf; 6265 6266 if (pcbddc->sub_schurs_rebuild) { 6267 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6268 } 6269 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6270 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6271 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6272 /* hacks */ 6273 pcisf = (PC_IS*)pcf->data; 6274 pcisf->is_B_local = pcis->is_B_local; 6275 pcisf->vec1_N = pcis->vec1_N; 6276 pcisf->BtoNmap = pcis->BtoNmap; 6277 pcisf->n = pcis->n; 6278 pcisf->n_B = pcis->n_B; 6279 pcbddcf = (PC_BDDC*)pcf->data; 6280 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6281 pcbddcf->mat_graph = pcbddc->mat_graph; 6282 pcbddcf->use_faces = PETSC_TRUE; 6283 pcbddcf->use_change_of_basis = PETSC_TRUE; 6284 pcbddcf->use_change_on_faces = PETSC_TRUE; 6285 pcbddcf->use_qr_single = PETSC_TRUE; 6286 pcbddcf->fake_change = PETSC_TRUE; 6287 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6288 /* store information on primal vertices and change of basis (in local numbering) */ 6289 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6290 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6291 change = pcbddcf->ConstraintMatrix; 6292 pcbddcf->ConstraintMatrix = NULL; 6293 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6294 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6295 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6296 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6297 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6298 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6299 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6300 pcf->ops->destroy = NULL; 6301 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6302 } 6303 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6304 ierr = MatDestroy(&change);CHKERRQ(ierr); 6305 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6306 } 6307 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6308 6309 /* free adjacency */ 6310 if (free_used_adj) { 6311 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6312 } 6313 PetscFunctionReturn(0); 6314 } 6315 6316 #undef __FUNCT__ 6317 #define __FUNCT__ "PCBDDCInitSubSchurs" 6318 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6319 { 6320 PC_IS *pcis=(PC_IS*)pc->data; 6321 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6322 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6323 PCBDDCGraph graph; 6324 PetscErrorCode ierr; 6325 6326 PetscFunctionBegin; 6327 /* attach interface graph for determining subsets */ 6328 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6329 IS verticesIS,verticescomm; 6330 PetscInt vsize,*idxs; 6331 6332 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6333 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6334 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6335 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6336 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6337 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6338 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6339 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6340 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6341 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6342 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6343 /* 6344 if (pcbddc->dbg_flag) { 6345 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6346 } 6347 */ 6348 } else { 6349 graph = pcbddc->mat_graph; 6350 } 6351 6352 /* sub_schurs init */ 6353 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6354 6355 /* free graph struct */ 6356 if (pcbddc->sub_schurs_rebuild) { 6357 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6358 } 6359 PetscFunctionReturn(0); 6360 } 6361 6362 #undef __FUNCT__ 6363 #define __FUNCT__ "PCBDDCCheckOperator" 6364 PetscErrorCode PCBDDCCheckOperator(PC pc) 6365 { 6366 PC_IS *pcis=(PC_IS*)pc->data; 6367 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6368 PetscErrorCode ierr; 6369 6370 PetscFunctionBegin; 6371 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6372 IS zerodiag = NULL; 6373 Mat S_j,B0_B=NULL; 6374 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6375 PetscScalar *p0_check,*array,*array2; 6376 PetscReal norm; 6377 PetscInt i; 6378 6379 /* B0 and B0_B */ 6380 if (zerodiag) { 6381 IS dummy; 6382 6383 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6384 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6385 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6386 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6387 } 6388 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6389 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6390 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6391 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6392 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6393 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6394 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6395 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6396 /* S_j */ 6397 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6398 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6399 6400 /* mimic vector in \widetilde{W}_\Gamma */ 6401 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6402 /* continuous in primal space */ 6403 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6404 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6405 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6406 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6407 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6408 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6409 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6410 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6411 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6412 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6413 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6414 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6415 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6416 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6417 6418 /* assemble rhs for coarse problem */ 6419 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6420 /* local with Schur */ 6421 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6422 if (zerodiag) { 6423 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6424 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6425 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6426 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6427 } 6428 /* sum on primal nodes the local contributions */ 6429 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6430 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6431 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6432 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6433 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6434 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6435 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6436 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6437 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6438 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6439 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6440 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6441 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6442 /* scale primal nodes (BDDC sums contibutions) */ 6443 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6444 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6445 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6446 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6447 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6448 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6449 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6450 /* global: \widetilde{B0}_B w_\Gamma */ 6451 if (zerodiag) { 6452 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6453 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6454 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6455 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6456 } 6457 /* BDDC */ 6458 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6459 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6460 6461 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6462 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6463 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6464 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6465 for (i=0;i<pcbddc->benign_n;i++) { 6466 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6467 } 6468 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6469 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6470 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6471 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6472 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6473 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6474 } 6475 PetscFunctionReturn(0); 6476 } 6477