1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 8 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 9 { 10 PCBDDCBenignMatMult_ctx ctx; 11 PetscErrorCode ierr; 12 PetscBool apply_right,apply_left,reset_x; 13 14 PetscFunctionBegin; 15 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 16 if (transpose) { 17 apply_right = ctx->apply_left; 18 apply_left = ctx->apply_right; 19 } else { 20 apply_right = ctx->apply_right; 21 apply_left = ctx->apply_left; 22 } 23 reset_x = PETSC_FALSE; 24 if (apply_right) { 25 const PetscScalar *ax; 26 PetscInt nl,i; 27 28 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 29 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 30 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 31 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 32 for (i=0;i<ctx->benign_n;i++) { 33 PetscScalar sum,val; 34 const PetscInt *idxs; 35 PetscInt nz,j; 36 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 37 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 38 sum = 0.; 39 if (ctx->apply_p0) { 40 val = ctx->work[idxs[nz-1]]; 41 for (j=0;j<nz-1;j++) { 42 sum += ctx->work[idxs[j]]; 43 ctx->work[idxs[j]] += val; 44 } 45 } else { 46 for (j=0;j<nz-1;j++) { 47 sum += ctx->work[idxs[j]]; 48 } 49 } 50 ctx->work[idxs[nz-1]] -= sum; 51 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 54 reset_x = PETSC_TRUE; 55 } 56 if (transpose) { 57 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 58 } else { 59 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 60 } 61 if (reset_x) { 62 ierr = VecResetArray(x);CHKERRQ(ierr); 63 } 64 if (apply_left) { 65 PetscScalar *ay; 66 PetscInt i; 67 68 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 69 for (i=0;i<ctx->benign_n;i++) { 70 PetscScalar sum,val; 71 const PetscInt *idxs; 72 PetscInt nz,j; 73 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 74 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 75 val = -ay[idxs[nz-1]]; 76 if (ctx->apply_p0) { 77 sum = 0.; 78 for (j=0;j<nz-1;j++) { 79 sum += ay[idxs[j]]; 80 ay[idxs[j]] += val; 81 } 82 ay[idxs[nz-1]] += sum; 83 } else { 84 for (j=0;j<nz-1;j++) { 85 ay[idxs[j]] += val; 86 } 87 ay[idxs[nz-1]] = 0.; 88 } 89 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 90 } 91 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 98 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 99 { 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 109 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 110 { 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCBenignShellMat" 120 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 121 { 122 PC_IS *pcis = (PC_IS*)pc->data; 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PCBDDCBenignMatMult_ctx ctx; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!restore) { 129 Mat A_IB,A_BI; 130 PetscScalar *work; 131 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 132 133 if (pcbddc->benign_original_mat) { 134 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 135 } 136 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 137 PetscFunctionReturn(0); 138 } 139 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 140 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 141 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 142 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 143 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 145 ierr = PetscNew(&ctx);CHKERRQ(ierr); 146 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 147 ctx->apply_left = PETSC_TRUE; 148 ctx->apply_right = PETSC_FALSE; 149 ctx->apply_p0 = PETSC_FALSE; 150 ctx->benign_n = pcbddc->benign_n; 151 if (reuse) { 152 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 153 ctx->free = PETSC_FALSE; 154 } else { /* TODO: could be optimized for successive solves */ 155 ISLocalToGlobalMapping N_to_D; 156 PetscInt i; 157 158 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 159 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) { 161 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 162 } 163 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 164 ctx->free = PETSC_TRUE; 165 } 166 ctx->A = pcis->A_IB; 167 ctx->work = work; 168 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 169 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 pcis->A_IB = A_IB; 172 173 /* A_BI as A_IB^T */ 174 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 175 pcbddc->benign_original_mat = pcis->A_BI; 176 pcis->A_BI = A_BI; 177 } else { 178 if (!pcbddc->benign_original_mat) { 179 PetscFunctionReturn(0); 180 } 181 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 182 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 183 pcis->A_IB = ctx->A; 184 ctx->A = NULL; 185 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 186 pcis->A_BI = pcbddc->benign_original_mat; 187 pcbddc->benign_original_mat = NULL; 188 if (ctx->free) { 189 PetscInt i; 190 for (i=0;i<ctx->benign_n;i++) { 191 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 192 } 193 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 194 } 195 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 196 ierr = PetscFree(ctx);CHKERRQ(ierr); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 /* used just in bddc debug mode */ 202 #undef __FUNCT__ 203 #define __FUNCT__ "PCBDDCBenignProject" 204 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 205 { 206 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 207 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 208 Mat An; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 213 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 214 if (is1) { 215 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 216 ierr = MatDestroy(&An);CHKERRQ(ierr); 217 } else { 218 *B = An; 219 } 220 PetscFunctionReturn(0); 221 } 222 223 /* TODO: add reuse flag */ 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatSeqAIJCompress" 226 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 227 { 228 Mat Bt; 229 PetscScalar *a,*bdata; 230 const PetscInt *ii,*ij; 231 PetscInt m,n,i,nnz,*bii,*bij; 232 PetscBool flg_row; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 237 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 238 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 239 nnz = n; 240 for (i=0;i<ii[n];i++) { 241 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 242 } 243 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 244 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 246 nnz = 0; 247 bii[0] = 0; 248 for (i=0;i<n;i++) { 249 PetscInt j; 250 for (j=ii[i];j<ii[i+1];j++) { 251 PetscScalar entry = a[j]; 252 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 253 bij[nnz] = ij[j]; 254 bdata[nnz] = entry; 255 nnz++; 256 } 257 } 258 bii[i+1] = nnz; 259 } 260 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 261 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 262 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 263 { 264 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 265 b->free_a = PETSC_TRUE; 266 b->free_ij = PETSC_TRUE; 267 } 268 *B = Bt; 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDetectDisconnectedComponents" 274 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 275 { 276 Mat B; 277 IS is_dummy,*cc_n; 278 ISLocalToGlobalMapping l2gmap_dummy; 279 PCBDDCGraph graph; 280 PetscInt i,n; 281 PetscInt *xadj,*adjncy; 282 PetscInt *xadj_filtered,*adjncy_filtered; 283 PetscBool flg_row,isseqaij; 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 if (!A->rmap->N || !A->cmap->N) { 288 *ncc = 0; 289 *cc = NULL; 290 PetscFunctionReturn(0); 291 } 292 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 293 if (!isseqaij && filter) { 294 PetscBool isseqdense; 295 296 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 297 if (!isseqdense) { 298 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 299 } else { /* TODO: rectangular case and LDA */ 300 PetscScalar *array; 301 PetscReal chop=1.e-6; 302 303 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 304 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 305 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 306 for (i=0;i<n;i++) { 307 PetscInt j; 308 for (j=i+1;j<n;j++) { 309 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 310 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 311 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 312 } 313 } 314 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 315 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 316 } 317 } else { 318 B = A; 319 } 320 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 321 322 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 323 if (filter) { 324 PetscScalar *data; 325 PetscInt j,cum; 326 327 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 328 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 329 cum = 0; 330 for (i=0;i<n;i++) { 331 PetscInt t; 332 333 for (j=xadj[i];j<xadj[i+1];j++) { 334 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 335 continue; 336 } 337 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 338 } 339 t = xadj_filtered[i]; 340 xadj_filtered[i] = cum; 341 cum += t; 342 } 343 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 344 } else { 345 xadj_filtered = NULL; 346 adjncy_filtered = NULL; 347 } 348 349 /* compute local connected components using PCBDDCGraph */ 350 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 351 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 352 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 353 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 354 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 355 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 356 if (xadj_filtered) { 357 graph->xadj = xadj_filtered; 358 graph->adjncy = adjncy_filtered; 359 } else { 360 graph->xadj = xadj; 361 graph->adjncy = adjncy; 362 } 363 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 365 /* partial clean up */ 366 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 367 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 368 if (A != B) { 369 ierr = MatDestroy(&B);CHKERRQ(ierr); 370 } 371 372 /* get back data */ 373 if (ncc) *ncc = graph->ncc; 374 if (cc) { 375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 376 for (i=0;i<graph->ncc;i++) { 377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 378 } 379 *cc = cc_n; 380 } 381 /* clean up graph */ 382 graph->xadj = 0; 383 graph->adjncy = 0; 384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCBenignCheck" 390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 391 { 392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 393 PC_IS* pcis = (PC_IS*)(pc->data); 394 IS dirIS = NULL; 395 PetscInt i; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 400 if (zerodiag) { 401 Mat A; 402 Vec vec3_N; 403 PetscScalar *vals; 404 const PetscInt *idxs; 405 PetscInt nz,*count; 406 407 /* p0 */ 408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 412 for (i=0;i<nz;i++) vals[i] = 1.; 413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 416 /* v_I */ 417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 418 for (i=0;i<nz;i++) vals[i] = 0.; 419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 425 if (dirIS) { 426 PetscInt n; 427 428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 430 for (i=0;i<n;i++) vals[i] = 0.; 431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 433 } 434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 438 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 441 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 442 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 443 } 444 ierr = PetscFree(vals);CHKERRQ(ierr); 445 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 446 447 /* there should not be any pressure dofs lying on the interface */ 448 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 449 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 450 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 451 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 453 for (i=0;i<nz;i++) { 454 if (count[idxs[i]]) { 455 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 456 } 457 } 458 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 459 ierr = PetscFree(count);CHKERRQ(ierr); 460 } 461 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 462 463 /* check PCBDDCBenignGetOrSetP0 */ 464 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 465 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 466 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) { 470 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 471 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 472 } 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 479 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 480 { 481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 482 IS pressures,zerodiag,*zerodiag_subs; 483 PetscInt nz,n; 484 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 489 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 490 for (n=0;n<pcbddc->benign_n;n++) { 491 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 492 } 493 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 494 pcbddc->benign_n = 0; 495 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 496 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 497 Checks if all the pressure dofs in each subdomain have a zero diagonal 498 If not, a change of basis on pressures is not needed 499 since the local Schur complements are SPD 500 */ 501 has_null_pressures = PETSC_TRUE; 502 have_null = PETSC_TRUE; 503 if (pcbddc->n_ISForDofsLocal) { 504 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 505 506 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 507 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 508 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 509 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 510 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 511 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 512 if (!sorted) { 513 ierr = ISSort(pressures);CHKERRQ(ierr); 514 } 515 } else { 516 pressures = NULL; 517 } 518 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 519 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 520 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 521 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 522 if (!sorted) { 523 ierr = ISSort(zerodiag);CHKERRQ(ierr); 524 } 525 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 526 if (!nz) { 527 if (n) have_null = PETSC_FALSE; 528 has_null_pressures = PETSC_FALSE; 529 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 530 } 531 recompute_zerodiag = PETSC_FALSE; 532 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 533 zerodiag_subs = NULL; 534 pcbddc->benign_n = 0; 535 if (has_null_pressures) { 536 IS *subs; 537 PetscInt nsubs,i; 538 539 subs = pcbddc->local_subs; 540 nsubs = pcbddc->n_local_subs; 541 if (nsubs > 1) { 542 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 543 for (i=0;i<nsubs;i++) { 544 ISLocalToGlobalMapping l2g; 545 IS t_zerodiag_subs; 546 PetscInt nl; 547 548 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 549 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 550 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 551 if (nl) { 552 PetscBool valid = PETSC_TRUE; 553 554 if (pressures) { 555 IS t_pressure_subs; 556 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 557 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 558 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 559 } 560 if (valid) { 561 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 562 pcbddc->benign_n++; 563 } else { 564 recompute_zerodiag = PETSC_TRUE; 565 } 566 } 567 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 568 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 569 } 570 } else { /* there's just one subdomain (or zero if they have not been detected */ 571 PetscBool valid = PETSC_TRUE; 572 if (pressures) { 573 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 574 } 575 if (valid) { 576 pcbddc->benign_n = 1; 577 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 578 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 579 zerodiag_subs[0] = zerodiag; 580 } 581 } 582 } 583 584 if (!pcbddc->benign_n) { 585 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 586 recompute_zerodiag = PETSC_FALSE; 587 has_null_pressures = PETSC_FALSE; 588 have_null = PETSC_FALSE; 589 } 590 591 /* final check for null pressures */ 592 if (zerodiag && pressures) { 593 PetscInt nz,np; 594 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 595 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 596 if (nz != np) have_null = PETSC_FALSE; 597 } 598 599 if (recompute_zerodiag) { 600 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 601 if (pcbddc->benign_n == 1) { 602 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 603 zerodiag = zerodiag_subs[0]; 604 } else { 605 PetscInt i,nzn,*new_idxs; 606 607 nzn = 0; 608 for (i=0;i<pcbddc->benign_n;i++) { 609 PetscInt ns; 610 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 611 nzn += ns; 612 } 613 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 614 nzn = 0; 615 for (i=0;i<pcbddc->benign_n;i++) { 616 PetscInt ns,*idxs; 617 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 618 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 619 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 620 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 621 nzn += ns; 622 } 623 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 624 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 625 } 626 have_null = PETSC_FALSE; 627 } 628 629 /* no-net-flux */ 630 if (pcbddc->benign_compute_nonetflux) { 631 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 632 MatNullSpace near_null_space; 633 ISLocalToGlobalMapping rmap; 634 Vec quad_vec; 635 PetscScalar *pvals; 636 PetscInt i,np,*dummyins; 637 638 /* create vector to hold quadrature weights */ 639 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 640 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 641 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 642 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 643 644 /* compute B^{(i)T} * 1_p */ 645 np = 0; 646 if (zerodiag) { 647 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 648 } else if (pressures) { 649 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 650 } 651 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 652 for (i=0;i<np;i++) pvals[i] = 1.; 653 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 654 if (np) { 655 const PetscInt *pidxs; 656 657 if (zerodiag) { 658 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 659 } else if (pressures) { 660 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 661 } 662 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 663 if (zerodiag) { 664 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 665 } else if (pressures) { 666 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 667 } 668 } 669 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 670 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 671 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 672 ierr = PetscFree(pvals);CHKERRQ(ierr); 673 674 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 675 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 676 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 677 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 678 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 679 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 680 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 681 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 682 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 683 for (i=0;i<matis->sf_nleaves;i++) { 684 if (PetscGlobalRank != matis->sf_leafdata[i]) { 685 pvals[i] = 0.; 686 } 687 } 688 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 689 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 690 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 691 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 692 ierr = PetscFree(dummyins);CHKERRQ(ierr); 693 694 /* assembly quadrature vec and attach near null space to pmat */ 695 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 696 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 697 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 698 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 699 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 700 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 701 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 702 } 703 704 /* change of basis and p0 dofs */ 705 if (has_null_pressures) { 706 IS zerodiagc; 707 const PetscInt *idxs,*idxsc; 708 PetscInt i,s,*nnz; 709 710 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 711 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 712 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 713 /* local change of basis for pressures */ 714 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 715 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 716 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 717 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 718 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 719 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 720 for (i=0;i<pcbddc->benign_n;i++) { 721 PetscInt nzs,j; 722 723 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 724 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 725 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 726 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 727 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 728 } 729 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 730 ierr = PetscFree(nnz);CHKERRQ(ierr); 731 /* set identity on velocities */ 732 for (i=0;i<n-nz;i++) { 733 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 734 } 735 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 736 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 738 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 739 /* set change on pressures */ 740 for (s=0;s<pcbddc->benign_n;s++) { 741 PetscScalar *array; 742 PetscInt nzs; 743 744 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 745 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 746 for (i=0;i<nzs-1;i++) { 747 PetscScalar vals[2]; 748 PetscInt cols[2]; 749 750 cols[0] = idxs[i]; 751 cols[1] = idxs[nzs-1]; 752 vals[0] = 1.; 753 vals[1] = 1.; 754 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 755 } 756 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 757 for (i=0;i<nzs-1;i++) array[i] = -1.; 758 array[nzs-1] = 1.; 759 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 760 /* store local idxs for p0 */ 761 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 762 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 763 ierr = PetscFree(array);CHKERRQ(ierr); 764 } 765 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* project if needed */ 768 if (pcbddc->benign_change_explicit) { 769 Mat M; 770 771 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 773 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 774 ierr = MatDestroy(&M);CHKERRQ(ierr); 775 } 776 /* store global idxs for p0 */ 777 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 778 } 779 pcbddc->benign_zerodiag_subs = zerodiag_subs; 780 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 781 782 /* determines if the coarse solver will be singular or not */ 783 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 784 *zerodiaglocal = zerodiag; 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 790 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 791 { 792 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 if (!pcbddc->benign_sf) { 797 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 798 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 799 } 800 if (get) { /* use SF to get values */ 801 PetscScalar *array; 802 803 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 804 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 805 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 806 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 807 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 808 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 809 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 810 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 817 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 818 { 819 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 820 PetscErrorCode ierr; 821 822 PetscFunctionBegin; 823 /* TODO: add error checking 824 - avoid nested pop (or push) calls. 825 - cannot push before pop. 826 - cannot call this if pcbddc->local_mat is NULL 827 */ 828 if (!pcbddc->benign_n) { 829 PetscFunctionReturn(0); 830 } 831 if (pop) { 832 if (pcbddc->benign_change_explicit) { 833 IS is_p0; 834 MatReuse reuse; 835 836 /* extract B_0 */ 837 reuse = MAT_INITIAL_MATRIX; 838 if (pcbddc->benign_B0) { 839 reuse = MAT_REUSE_MATRIX; 840 } 841 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 842 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 843 /* remove rows and cols from local problem */ 844 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 845 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 846 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 848 } else { 849 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 850 PetscScalar *vals; 851 PetscInt i,n,*idxs_ins; 852 853 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 854 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 855 if (!pcbddc->benign_B0) { 856 PetscInt *nnz; 857 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 858 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 859 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 860 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 861 for (i=0;i<pcbddc->benign_n;i++) { 862 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 863 nnz[i] = n - nnz[i]; 864 } 865 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 866 ierr = PetscFree(nnz);CHKERRQ(ierr); 867 } 868 869 for (i=0;i<pcbddc->benign_n;i++) { 870 PetscScalar *array; 871 PetscInt *idxs,j,nz,cum; 872 873 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 874 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 875 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 876 for (j=0;j<nz;j++) vals[j] = 1.; 877 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 878 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 879 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 880 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 881 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 882 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 883 cum = 0; 884 for (j=0;j<n;j++) { 885 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 886 vals[cum] = array[j]; 887 idxs_ins[cum] = j; 888 cum++; 889 } 890 } 891 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 892 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 } 895 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 898 } 899 } else { /* push */ 900 if (pcbddc->benign_change_explicit) { 901 PetscInt i; 902 903 for (i=0;i<pcbddc->benign_n;i++) { 904 PetscScalar *B0_vals; 905 PetscInt *B0_cols,B0_ncol; 906 907 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 908 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 909 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 911 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 } else { 916 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 917 } 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "PCBDDCAdaptiveSelection" 924 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 925 { 926 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 927 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 928 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 929 PetscBLASInt *B_iwork,*B_ifail; 930 PetscScalar *work,lwork; 931 PetscScalar *St,*S,*eigv; 932 PetscScalar *Sarray,*Starray; 933 PetscReal *eigs,thresh; 934 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 935 PetscBool allocated_S_St; 936 #if defined(PETSC_USE_COMPLEX) 937 PetscReal *rwork; 938 #endif 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 if (!sub_schurs->schur_explicit) { 943 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 944 } 945 946 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 947 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 948 } 949 950 if (pcbddc->dbg_flag) { 951 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 954 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 955 } 956 957 if (pcbddc->dbg_flag) { 958 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 959 } 960 961 /* max size of subsets */ 962 mss = 0; 963 for (i=0;i<sub_schurs->n_subs;i++) { 964 PetscInt subset_size; 965 966 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 967 mss = PetscMax(mss,subset_size); 968 } 969 970 /* min/max and threshold */ 971 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 972 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 973 nmax = PetscMax(nmin,nmax); 974 allocated_S_St = PETSC_FALSE; 975 if (nmin) { 976 allocated_S_St = PETSC_TRUE; 977 } 978 979 /* allocate lapack workspace */ 980 cum = cum2 = 0; 981 maxneigs = 0; 982 for (i=0;i<sub_schurs->n_subs;i++) { 983 PetscInt n,subset_size; 984 985 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 986 n = PetscMin(subset_size,nmax); 987 cum += subset_size; 988 cum2 += subset_size*n; 989 maxneigs = PetscMax(maxneigs,n); 990 } 991 if (mss) { 992 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 993 PetscBLASInt B_itype = 1; 994 PetscBLASInt B_N = mss; 995 PetscReal zero = 0.0; 996 PetscReal eps = 0.0; /* dlamch? */ 997 998 B_lwork = -1; 999 S = NULL; 1000 St = NULL; 1001 eigs = NULL; 1002 eigv = NULL; 1003 B_iwork = NULL; 1004 B_ifail = NULL; 1005 #if defined(PETSC_USE_COMPLEX) 1006 rwork = NULL; 1007 #endif 1008 thresh = 1.0; 1009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1010 #if defined(PETSC_USE_COMPLEX) 1011 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1012 #else 1013 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1014 #endif 1015 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1016 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1017 } else { 1018 /* TODO */ 1019 } 1020 } else { 1021 lwork = 0; 1022 } 1023 1024 nv = 0; 1025 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1026 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1027 } 1028 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1029 if (allocated_S_St) { 1030 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1031 } 1032 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1033 #if defined(PETSC_USE_COMPLEX) 1034 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1035 #endif 1036 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1037 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1038 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1039 nv+cum,&pcbddc->adaptive_constraints_idxs, 1040 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1041 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1042 1043 maxneigs = 0; 1044 cum = cumarray = 0; 1045 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1046 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1047 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1048 const PetscInt *idxs; 1049 1050 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1051 for (cum=0;cum<nv;cum++) { 1052 pcbddc->adaptive_constraints_n[cum] = 1; 1053 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1054 pcbddc->adaptive_constraints_data[cum] = 1.0; 1055 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1056 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1057 } 1058 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1059 } 1060 1061 if (mss) { /* multilevel */ 1062 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1063 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1064 } 1065 1066 thresh = pcbddc->adaptive_threshold; 1067 for (i=0;i<sub_schurs->n_subs;i++) { 1068 const PetscInt *idxs; 1069 PetscReal infty = PETSC_MAX_REAL; 1070 PetscInt j,subset_size,eigs_start = 0; 1071 PetscBLASInt B_N; 1072 PetscBool same_data = PETSC_FALSE; 1073 1074 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1075 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1076 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1077 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1078 if (sub_schurs->is_hermitian) { 1079 PetscInt j,k; 1080 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1081 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1082 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1083 } 1084 for (j=0;j<subset_size;j++) { 1085 for (k=j;k<subset_size;k++) { 1086 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1087 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1088 } 1089 } 1090 } else { 1091 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1092 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1093 } 1094 } else { 1095 S = Sarray + cumarray; 1096 St = Starray + cumarray; 1097 } 1098 /* see if we can save some work */ 1099 if (sub_schurs->n_subs == 1) { 1100 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1101 } 1102 1103 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1104 B_neigs = 0; 1105 } else { 1106 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1107 PetscBLASInt B_itype = 1; 1108 PetscBLASInt B_IL, B_IU; 1109 PetscReal eps = -1.0; /* dlamch? */ 1110 PetscInt nmin_s; 1111 1112 if (pcbddc->dbg_flag) { 1113 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1114 } 1115 1116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1117 if (thresh > 1.+PETSC_SMALL) { 1118 1119 /* ask for eigenvalues larger than thresh */ 1120 #if defined(PETSC_USE_COMPLEX) 1121 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1122 #else 1123 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1124 #endif 1125 } else { 1126 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1127 B_IL = 1; 1128 #if defined(PETSC_USE_COMPLEX) 1129 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1130 #else 1131 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1132 #endif 1133 } 1134 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1135 if (B_ierr) { 1136 if (B_ierr < 0 ) { 1137 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1138 } else if (B_ierr <= B_N) { 1139 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1140 } else { 1141 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1142 } 1143 } 1144 1145 if (B_neigs > nmax) { 1146 if (pcbddc->dbg_flag) { 1147 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1148 } 1149 eigs_start = B_neigs -nmax; 1150 B_neigs = nmax; 1151 } 1152 1153 nmin_s = PetscMin(nmin,B_N); 1154 if (B_neigs < nmin_s) { 1155 PetscBLASInt B_neigs2; 1156 1157 B_IU = B_N - B_neigs; 1158 B_IL = B_N - nmin_s + 1; 1159 if (pcbddc->dbg_flag) { 1160 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1161 } 1162 if (sub_schurs->is_hermitian) { 1163 PetscInt j; 1164 for (j=0;j<subset_size;j++) { 1165 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1166 } 1167 for (j=0;j<subset_size;j++) { 1168 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1169 } 1170 } else { 1171 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1172 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1173 } 1174 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1175 #if defined(PETSC_USE_COMPLEX) 1176 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1177 #else 1178 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1179 #endif 1180 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1181 B_neigs += B_neigs2; 1182 } 1183 if (B_ierr) { 1184 if (B_ierr < 0 ) { 1185 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1186 } else if (B_ierr <= B_N) { 1187 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1188 } else { 1189 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1190 } 1191 } 1192 if (pcbddc->dbg_flag) { 1193 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1194 for (j=0;j<B_neigs;j++) { 1195 if (eigs[j] == 0.0) { 1196 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1197 } else { 1198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1199 } 1200 } 1201 } 1202 } else { 1203 /* TODO */ 1204 } 1205 } 1206 /* change the basis back to the original one */ 1207 if (sub_schurs->change) { 1208 Mat change,phi,phit; 1209 1210 if (pcbddc->dbg_flag > 1) { 1211 PetscInt ii; 1212 for (ii=0;ii<B_neigs;ii++) { 1213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1214 for (j=0;j<B_N;j++) { 1215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1216 } 1217 } 1218 } 1219 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1220 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1221 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1222 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1223 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1224 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1225 } 1226 maxneigs = PetscMax(B_neigs,maxneigs); 1227 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1228 if (B_neigs) { 1229 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1230 1231 if (pcbddc->dbg_flag > 1) { 1232 PetscInt ii; 1233 for (ii=0;ii<B_neigs;ii++) { 1234 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1235 for (j=0;j<B_N;j++) { 1236 #if defined(PETSC_USE_COMPLEX) 1237 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1238 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1240 #else 1241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1242 #endif 1243 } 1244 } 1245 } 1246 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1247 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1248 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1249 cum++; 1250 } 1251 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1252 /* shift for next computation */ 1253 cumarray += subset_size*subset_size; 1254 } 1255 if (pcbddc->dbg_flag) { 1256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1257 } 1258 1259 if (mss) { 1260 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1261 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1262 /* destroy matrices (junk) */ 1263 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1264 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1265 } 1266 if (allocated_S_St) { 1267 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1268 } 1269 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1270 #if defined(PETSC_USE_COMPLEX) 1271 ierr = PetscFree(rwork);CHKERRQ(ierr); 1272 #endif 1273 if (pcbddc->dbg_flag) { 1274 PetscInt maxneigs_r; 1275 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "PCBDDCSetUpSolvers" 1283 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1284 { 1285 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1286 PetscScalar *coarse_submat_vals; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1291 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1292 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1293 1294 /* Setup local neumann solver ksp_R */ 1295 /* PCBDDCSetUpLocalScatters should be called first! */ 1296 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1297 1298 /* Change global null space passed in by the user if change of basis has been requested */ 1299 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1300 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1301 } 1302 1303 /* 1304 Setup local correction and local part of coarse basis. 1305 Gives back the dense local part of the coarse matrix in column major ordering 1306 */ 1307 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1308 1309 /* Compute total number of coarse nodes and setup coarse solver */ 1310 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1311 1312 /* free */ 1313 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "PCBDDCResetCustomization" 1319 PetscErrorCode PCBDDCResetCustomization(PC pc) 1320 { 1321 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1322 PetscErrorCode ierr; 1323 1324 PetscFunctionBegin; 1325 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1326 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1327 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1328 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1329 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1330 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1331 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1332 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1333 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1334 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1335 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1336 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "PCBDDCResetTopography" 1342 PetscErrorCode PCBDDCResetTopography(PC pc) 1343 { 1344 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1345 PetscInt i; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1350 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1351 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1352 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1353 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1354 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1355 for (i=0;i<pcbddc->n_local_subs;i++) { 1356 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1357 } 1358 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1359 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "PCBDDCResetSolvers" 1365 PetscErrorCode PCBDDCResetSolvers(PC pc) 1366 { 1367 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1372 if (pcbddc->coarse_phi_B) { 1373 PetscScalar *array; 1374 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1375 ierr = PetscFree(array);CHKERRQ(ierr); 1376 } 1377 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1378 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1379 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1380 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1381 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1382 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1383 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1384 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1385 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1386 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1387 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1388 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1389 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1390 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1391 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1392 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1393 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1394 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1395 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1396 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1397 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1398 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1399 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1400 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1401 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1402 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1403 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1404 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1405 if (pcbddc->benign_zerodiag_subs) { 1406 PetscInt i; 1407 for (i=0;i<pcbddc->benign_n;i++) { 1408 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1411 } 1412 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1418 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1419 { 1420 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1421 PC_IS *pcis = (PC_IS*)pc->data; 1422 VecType impVecType; 1423 PetscInt n_constraints,n_R,old_size; 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 if (!pcbddc->ConstraintMatrix) { 1428 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1429 } 1430 /* get sizes */ 1431 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1432 n_R = pcis->n - pcbddc->n_vertices; 1433 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1434 /* local work vectors (try to avoid unneeded work)*/ 1435 /* R nodes */ 1436 old_size = -1; 1437 if (pcbddc->vec1_R) { 1438 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1439 } 1440 if (n_R != old_size) { 1441 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1442 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1443 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1444 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1445 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1446 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1447 } 1448 /* local primal dofs */ 1449 old_size = -1; 1450 if (pcbddc->vec1_P) { 1451 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1452 } 1453 if (pcbddc->local_primal_size != old_size) { 1454 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1455 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1456 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1457 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1458 } 1459 /* local explicit constraints */ 1460 old_size = -1; 1461 if (pcbddc->vec1_C) { 1462 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1463 } 1464 if (n_constraints && n_constraints != old_size) { 1465 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1466 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1467 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1468 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "PCBDDCSetUpCorrection" 1475 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1476 { 1477 PetscErrorCode ierr; 1478 /* pointers to pcis and pcbddc */ 1479 PC_IS* pcis = (PC_IS*)pc->data; 1480 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1481 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1482 /* submatrices of local problem */ 1483 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1484 /* submatrices of local coarse problem */ 1485 Mat S_VV,S_CV,S_VC,S_CC; 1486 /* working matrices */ 1487 Mat C_CR; 1488 /* additional working stuff */ 1489 PC pc_R; 1490 Mat F; 1491 Vec dummy_vec; 1492 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1493 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1494 PetscScalar *work; 1495 PetscInt *idx_V_B; 1496 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1497 PetscInt i,n_R,n_D,n_B; 1498 1499 /* some shortcuts to scalars */ 1500 PetscScalar one=1.0,m_one=-1.0; 1501 1502 PetscFunctionBegin; 1503 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1504 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1505 } 1506 1507 /* Set Non-overlapping dimensions */ 1508 n_vertices = pcbddc->n_vertices; 1509 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1510 n_B = pcis->n_B; 1511 n_D = pcis->n - n_B; 1512 n_R = pcis->n - n_vertices; 1513 1514 /* vertices in boundary numbering */ 1515 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1516 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1517 if (i != n_vertices) { 1518 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1519 } 1520 1521 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1522 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1523 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1524 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1526 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1527 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1528 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1529 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1530 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1531 1532 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1533 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1534 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1535 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1536 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1537 lda_rhs = n_R; 1538 need_benign_correction = PETSC_FALSE; 1539 if (isLU || isILU || isCHOL) { 1540 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1541 } else if (sub_schurs->reuse_solver) { 1542 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1543 MatFactorType type; 1544 1545 F = reuse_solver->F; 1546 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1547 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1548 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1549 need_benign_correction = !!reuse_solver->benign_n; 1550 } else { 1551 F = NULL; 1552 } 1553 1554 /* allocate workspace */ 1555 n = 0; 1556 if (n_constraints) { 1557 n += lda_rhs*n_constraints; 1558 } 1559 if (n_vertices) { 1560 n = PetscMax(2*lda_rhs*n_vertices,n); 1561 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1562 } 1563 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1564 1565 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1566 dummy_vec = NULL; 1567 if (need_benign_correction && lda_rhs != n_R && F) { 1568 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1569 } 1570 1571 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1572 if (n_constraints) { 1573 Mat M1,M2,M3,C_B; 1574 IS is_aux; 1575 PetscScalar *array,*array2; 1576 1577 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1578 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1579 1580 /* Extract constraints on R nodes: C_{CR} */ 1581 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1582 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1583 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1584 1585 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1586 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1587 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1588 for (i=0;i<n_constraints;i++) { 1589 const PetscScalar *row_cmat_values; 1590 const PetscInt *row_cmat_indices; 1591 PetscInt size_of_constraint,j; 1592 1593 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1594 for (j=0;j<size_of_constraint;j++) { 1595 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1596 } 1597 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1598 } 1599 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1600 if (F) { 1601 Mat B; 1602 1603 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1604 if (need_benign_correction) { 1605 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1606 1607 /* rhs is already zero on interior dofs, no need to change the rhs */ 1608 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1609 } 1610 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1611 if (need_benign_correction) { 1612 PetscScalar *marr; 1613 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1614 1615 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1616 if (lda_rhs != n_R) { 1617 for (i=0;i<n_constraints;i++) { 1618 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1619 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1620 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1621 } 1622 } else { 1623 for (i=0;i<n_constraints;i++) { 1624 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1625 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1626 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1627 } 1628 } 1629 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1630 } 1631 ierr = MatDestroy(&B);CHKERRQ(ierr); 1632 } else { 1633 PetscScalar *marr; 1634 1635 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1636 for (i=0;i<n_constraints;i++) { 1637 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1638 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1639 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1640 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1641 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1642 } 1643 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1644 } 1645 if (!pcbddc->switch_static) { 1646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1647 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1648 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1649 for (i=0;i<n_constraints;i++) { 1650 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1651 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1652 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1656 } 1657 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1658 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1659 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1660 } else { 1661 if (lda_rhs != n_R) { 1662 IS dummy; 1663 1664 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1665 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1666 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1667 } else { 1668 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1669 pcbddc->local_auxmat2 = local_auxmat2_R; 1670 } 1671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1672 } 1673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1678 if (isCHOL) { 1679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1680 } else { 1681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1682 } 1683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1689 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1690 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1693 } 1694 1695 /* Get submatrices from subdomain matrix */ 1696 if (n_vertices) { 1697 IS is_aux; 1698 1699 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1700 IS tis; 1701 1702 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1703 ierr = ISSort(tis);CHKERRQ(ierr); 1704 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1705 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1706 } else { 1707 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1708 } 1709 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1710 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1711 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1712 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1713 } 1714 1715 /* Matrix of coarse basis functions (local) */ 1716 if (pcbddc->coarse_phi_B) { 1717 PetscInt on_B,on_primal,on_D=n_D; 1718 if (pcbddc->coarse_phi_D) { 1719 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1720 } 1721 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1722 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1723 PetscScalar *marray; 1724 1725 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1726 ierr = PetscFree(marray);CHKERRQ(ierr); 1727 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1728 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1729 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1730 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1731 } 1732 } 1733 1734 if (!pcbddc->coarse_phi_B) { 1735 PetscScalar *marray; 1736 1737 n = n_B*pcbddc->local_primal_size; 1738 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1739 n += n_D*pcbddc->local_primal_size; 1740 } 1741 if (!pcbddc->symmetric_primal) { 1742 n *= 2; 1743 } 1744 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1746 n = n_B*pcbddc->local_primal_size; 1747 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1748 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1749 n += n_D*pcbddc->local_primal_size; 1750 } 1751 if (!pcbddc->symmetric_primal) { 1752 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1754 n = n_B*pcbddc->local_primal_size; 1755 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1756 } 1757 } else { 1758 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1759 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1760 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1761 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1762 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1763 } 1764 } 1765 } 1766 1767 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1768 p0_lidx_I = NULL; 1769 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1770 const PetscInt *idxs; 1771 1772 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1773 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1774 for (i=0;i<pcbddc->benign_n;i++) { 1775 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1776 } 1777 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1778 } 1779 1780 /* vertices */ 1781 if (n_vertices) { 1782 1783 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1784 1785 if (n_R) { 1786 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1787 PetscBLASInt B_N,B_one = 1; 1788 PetscScalar *x,*y; 1789 PetscBool isseqaij; 1790 1791 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1792 if (need_benign_correction) { 1793 ISLocalToGlobalMapping RtoN; 1794 IS is_p0; 1795 PetscInt *idxs_p0,n; 1796 1797 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1798 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1799 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1800 if (n != pcbddc->benign_n) { 1801 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1802 } 1803 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1804 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1805 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1806 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1807 } 1808 1809 if (lda_rhs == n_R) { 1810 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1811 } else { 1812 PetscScalar *av,*array; 1813 const PetscInt *xadj,*adjncy; 1814 PetscInt n; 1815 PetscBool flg_row; 1816 1817 array = work+lda_rhs*n_vertices; 1818 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1819 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1820 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1821 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1822 for (i=0;i<n;i++) { 1823 PetscInt j; 1824 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1825 } 1826 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1827 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1828 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1829 } 1830 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1831 if (need_benign_correction) { 1832 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1833 PetscScalar *marr; 1834 1835 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1836 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1837 1838 | 0 0 0 | (V) 1839 L = | 0 0 -1 | (P-p0) 1840 | 0 0 -1 | (p0) 1841 1842 */ 1843 for (i=0;i<reuse_solver->benign_n;i++) { 1844 const PetscScalar *vals; 1845 const PetscInt *idxs,*idxs_zero; 1846 PetscInt n,j,nz; 1847 1848 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1849 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1850 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1851 for (j=0;j<n;j++) { 1852 PetscScalar val = vals[j]; 1853 PetscInt k,col = idxs[j]; 1854 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1855 } 1856 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1857 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1858 } 1859 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1860 } 1861 if (F) { 1862 /* need to correct the rhs */ 1863 if (need_benign_correction) { 1864 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1865 PetscScalar *marr; 1866 1867 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1868 if (lda_rhs != n_R) { 1869 for (i=0;i<n_vertices;i++) { 1870 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1871 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1872 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1873 } 1874 } else { 1875 for (i=0;i<n_vertices;i++) { 1876 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1877 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1878 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1879 } 1880 } 1881 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1882 } 1883 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1884 /* need to correct the solution */ 1885 if (need_benign_correction) { 1886 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1887 PetscScalar *marr; 1888 1889 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1890 if (lda_rhs != n_R) { 1891 for (i=0;i<n_vertices;i++) { 1892 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1893 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1894 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1895 } 1896 } else { 1897 for (i=0;i<n_vertices;i++) { 1898 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1899 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1901 } 1902 } 1903 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1904 } 1905 } else { 1906 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1907 for (i=0;i<n_vertices;i++) { 1908 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1909 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1910 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1911 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1912 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1913 } 1914 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1915 } 1916 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1917 /* S_VV and S_CV */ 1918 if (n_constraints) { 1919 Mat B; 1920 1921 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1922 for (i=0;i<n_vertices;i++) { 1923 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1924 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1925 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1926 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1927 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1928 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1929 } 1930 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1931 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1932 ierr = MatDestroy(&B);CHKERRQ(ierr); 1933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1934 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1935 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1936 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1937 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1938 ierr = MatDestroy(&B);CHKERRQ(ierr); 1939 } 1940 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1941 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1942 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1943 } 1944 if (lda_rhs != n_R) { 1945 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1946 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1947 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1948 } 1949 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1950 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1951 if (need_benign_correction) { 1952 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1953 PetscScalar *marr,*sums; 1954 1955 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1956 ierr = MatDenseGetArray(S_VVt,&marr); 1957 for (i=0;i<reuse_solver->benign_n;i++) { 1958 const PetscScalar *vals; 1959 const PetscInt *idxs,*idxs_zero; 1960 PetscInt n,j,nz; 1961 1962 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1963 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1964 for (j=0;j<n_vertices;j++) { 1965 PetscInt k; 1966 sums[j] = 0.; 1967 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1968 } 1969 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1970 for (j=0;j<n;j++) { 1971 PetscScalar val = vals[j]; 1972 PetscInt k; 1973 for (k=0;k<n_vertices;k++) { 1974 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1975 } 1976 } 1977 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1978 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1979 } 1980 ierr = PetscFree(sums);CHKERRQ(ierr); 1981 ierr = MatDenseRestoreArray(S_VVt,&marr); 1982 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1983 } 1984 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1985 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1986 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1987 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1988 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1989 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1990 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1991 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1992 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1993 } else { 1994 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1995 } 1996 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1997 1998 /* coarse basis functions */ 1999 for (i=0;i<n_vertices;i++) { 2000 PetscScalar *y; 2001 2002 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2003 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2004 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2005 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2006 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2007 y[n_B*i+idx_V_B[i]] = 1.0; 2008 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2009 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2010 2011 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2012 PetscInt j; 2013 2014 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2015 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2016 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2017 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2018 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2019 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2020 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2021 } 2022 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2023 } 2024 /* if n_R == 0 the object is not destroyed */ 2025 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2026 } 2027 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2028 2029 if (n_constraints) { 2030 Mat B; 2031 2032 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2033 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2034 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2035 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2036 if (n_vertices) { 2037 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2038 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2039 } else { 2040 Mat S_VCt; 2041 2042 if (lda_rhs != n_R) { 2043 ierr = MatDestroy(&B);CHKERRQ(ierr); 2044 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2045 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2046 } 2047 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2048 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2049 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2050 } 2051 } 2052 ierr = MatDestroy(&B);CHKERRQ(ierr); 2053 /* coarse basis functions */ 2054 for (i=0;i<n_constraints;i++) { 2055 PetscScalar *y; 2056 2057 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2058 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2059 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2060 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2061 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2062 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2063 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2064 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2065 PetscInt j; 2066 2067 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2068 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2069 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2071 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2072 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2073 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2074 } 2075 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2076 } 2077 } 2078 if (n_constraints) { 2079 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2080 } 2081 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2082 2083 /* coarse matrix entries relative to B_0 */ 2084 if (pcbddc->benign_n) { 2085 Mat B0_B,B0_BPHI; 2086 IS is_dummy; 2087 PetscScalar *data; 2088 PetscInt j; 2089 2090 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2091 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2092 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2093 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2094 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2095 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2096 for (j=0;j<pcbddc->benign_n;j++) { 2097 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2098 for (i=0;i<pcbddc->local_primal_size;i++) { 2099 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2100 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2101 } 2102 } 2103 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2104 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2105 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2106 } 2107 2108 /* compute other basis functions for non-symmetric problems */ 2109 if (!pcbddc->symmetric_primal) { 2110 Mat B_V=NULL,B_C=NULL; 2111 PetscScalar *marray; 2112 2113 if (n_constraints) { 2114 Mat S_CCT,C_CRT; 2115 2116 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 2117 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2118 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2119 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2120 if (n_vertices) { 2121 Mat S_VCT; 2122 2123 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2124 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2125 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2126 } 2127 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2128 } 2129 if (n_vertices && n_R) { 2130 PetscScalar *av,*marray; 2131 const PetscInt *xadj,*adjncy; 2132 PetscInt n; 2133 PetscBool flg_row; 2134 2135 /* B_V = B_V - A_VR^T */ 2136 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 2137 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2138 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2139 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2140 for (i=0;i<n;i++) { 2141 PetscInt j; 2142 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2143 } 2144 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2145 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2146 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2147 } 2148 2149 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2150 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2151 for (i=0;i<n_vertices;i++) { 2152 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2153 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2154 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2155 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2156 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2157 } 2158 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2159 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2160 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2161 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2162 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2163 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2164 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2165 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2166 } 2167 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2168 /* coarse basis functions */ 2169 for (i=0;i<pcbddc->local_primal_size;i++) { 2170 PetscScalar *y; 2171 2172 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2173 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2174 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2175 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2176 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2177 if (i<n_vertices) { 2178 y[n_B*i+idx_V_B[i]] = 1.0; 2179 } 2180 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2181 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2182 2183 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2184 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2185 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2186 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2187 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2188 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2189 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2190 } 2191 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2192 } 2193 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2194 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2195 } 2196 /* free memory */ 2197 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2198 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2199 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2200 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2201 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2202 ierr = PetscFree(work);CHKERRQ(ierr); 2203 if (n_vertices) { 2204 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2205 } 2206 if (n_constraints) { 2207 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2208 } 2209 /* Checking coarse_sub_mat and coarse basis functios */ 2210 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2211 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2212 if (pcbddc->dbg_flag) { 2213 Mat coarse_sub_mat; 2214 Mat AUXMAT,TM1,TM2,TM3,TM4; 2215 Mat coarse_phi_D,coarse_phi_B; 2216 Mat coarse_psi_D,coarse_psi_B; 2217 Mat A_II,A_BB,A_IB,A_BI; 2218 Mat C_B,CPHI; 2219 IS is_dummy; 2220 Vec mones; 2221 MatType checkmattype=MATSEQAIJ; 2222 PetscReal real_value; 2223 2224 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2225 Mat A; 2226 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2227 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2228 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2229 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2230 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2231 ierr = MatDestroy(&A);CHKERRQ(ierr); 2232 } else { 2233 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2234 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2235 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2236 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2237 } 2238 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2239 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2240 if (!pcbddc->symmetric_primal) { 2241 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2242 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2243 } 2244 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2245 2246 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2248 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2249 if (!pcbddc->symmetric_primal) { 2250 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2251 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2252 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2253 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2254 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2255 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2256 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2257 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2258 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2259 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2260 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2261 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2262 } else { 2263 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2264 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2265 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2266 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2267 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2268 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2269 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2270 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2271 } 2272 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2273 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2274 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2275 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 2276 if (pcbddc->benign_n) { 2277 Mat B0_B,B0_BPHI; 2278 PetscScalar *data,*data2; 2279 PetscInt j; 2280 2281 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2282 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2283 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2284 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2285 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2286 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2287 for (j=0;j<pcbddc->benign_n;j++) { 2288 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2289 for (i=0;i<pcbddc->local_primal_size;i++) { 2290 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2291 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2292 } 2293 } 2294 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2295 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2296 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2297 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2298 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2299 } 2300 #if 0 2301 { 2302 PetscViewer viewer; 2303 char filename[256]; 2304 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2305 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2306 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2307 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2308 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2309 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2310 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2311 if (save_change) { 2312 Mat phi_B; 2313 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2314 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2315 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2316 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2317 } else { 2318 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2319 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2320 } 2321 if (pcbddc->coarse_phi_D) { 2322 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2323 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2324 } 2325 if (pcbddc->coarse_psi_B) { 2326 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2327 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2328 } 2329 if (pcbddc->coarse_psi_D) { 2330 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2331 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2332 } 2333 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2334 } 2335 #endif 2336 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2337 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2338 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2340 2341 /* check constraints */ 2342 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2343 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2344 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2345 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2346 } else { 2347 PetscScalar *data; 2348 Mat tmat; 2349 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2350 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2351 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2352 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2353 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2354 } 2355 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2356 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2357 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2358 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2360 if (!pcbddc->symmetric_primal) { 2361 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2362 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2363 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2364 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2365 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2366 } 2367 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2368 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2369 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2370 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2371 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2372 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2373 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2374 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2375 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2376 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2377 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2378 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2379 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2380 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2381 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2382 if (!pcbddc->symmetric_primal) { 2383 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2384 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2385 } 2386 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2387 } 2388 /* get back data */ 2389 *coarse_submat_vals_n = coarse_submat_vals; 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2395 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2396 { 2397 Mat *work_mat; 2398 IS isrow_s,iscol_s; 2399 PetscBool rsorted,csorted; 2400 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2405 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2406 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2407 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2408 2409 if (!rsorted) { 2410 const PetscInt *idxs; 2411 PetscInt *idxs_sorted,i; 2412 2413 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2414 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2415 for (i=0;i<rsize;i++) { 2416 idxs_perm_r[i] = i; 2417 } 2418 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2419 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2420 for (i=0;i<rsize;i++) { 2421 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2422 } 2423 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2424 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2425 } else { 2426 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2427 isrow_s = isrow; 2428 } 2429 2430 if (!csorted) { 2431 if (isrow == iscol) { 2432 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2433 iscol_s = isrow_s; 2434 } else { 2435 const PetscInt *idxs; 2436 PetscInt *idxs_sorted,i; 2437 2438 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2439 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2440 for (i=0;i<csize;i++) { 2441 idxs_perm_c[i] = i; 2442 } 2443 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2444 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2445 for (i=0;i<csize;i++) { 2446 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2447 } 2448 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2449 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2450 } 2451 } else { 2452 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2453 iscol_s = iscol; 2454 } 2455 2456 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2457 2458 if (!rsorted || !csorted) { 2459 Mat new_mat; 2460 IS is_perm_r,is_perm_c; 2461 2462 if (!rsorted) { 2463 PetscInt *idxs_r,i; 2464 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2465 for (i=0;i<rsize;i++) { 2466 idxs_r[idxs_perm_r[i]] = i; 2467 } 2468 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2469 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2470 } else { 2471 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2472 } 2473 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2474 2475 if (!csorted) { 2476 if (isrow_s == iscol_s) { 2477 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2478 is_perm_c = is_perm_r; 2479 } else { 2480 PetscInt *idxs_c,i; 2481 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2482 for (i=0;i<csize;i++) { 2483 idxs_c[idxs_perm_c[i]] = i; 2484 } 2485 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2486 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2487 } 2488 } else { 2489 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2490 } 2491 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2492 2493 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2494 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2495 work_mat[0] = new_mat; 2496 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2497 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2498 } 2499 2500 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2501 *B = work_mat[0]; 2502 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2503 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2504 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 #undef __FUNCT__ 2509 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2510 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2511 { 2512 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2513 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2514 Mat new_mat; 2515 IS is_local,is_global; 2516 PetscInt local_size; 2517 PetscBool isseqaij; 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2522 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2523 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2524 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2525 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2526 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2527 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2528 2529 /* check */ 2530 if (pcbddc->dbg_flag) { 2531 Vec x,x_change; 2532 PetscReal error; 2533 2534 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2535 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2536 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2537 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2538 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2539 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2540 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2541 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2542 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2543 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2544 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2545 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2546 ierr = VecDestroy(&x);CHKERRQ(ierr); 2547 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2548 } 2549 2550 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2551 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2552 if (isseqaij) { 2553 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2554 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2555 } else { 2556 Mat work_mat; 2557 2558 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2559 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2560 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2561 } 2562 if (matis->A->symmetric_set) { 2563 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2564 #if !defined(PETSC_USE_COMPLEX) 2565 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2566 #endif 2567 } 2568 /* 2569 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2570 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2571 */ 2572 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #undef __FUNCT__ 2577 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2578 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2579 { 2580 PC_IS* pcis = (PC_IS*)(pc->data); 2581 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2582 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2583 PetscInt *idx_R_local=NULL; 2584 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2585 PetscInt vbs,bs; 2586 PetscBT bitmask=NULL; 2587 PetscErrorCode ierr; 2588 2589 PetscFunctionBegin; 2590 /* 2591 No need to setup local scatters if 2592 - primal space is unchanged 2593 AND 2594 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2595 AND 2596 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2597 */ 2598 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2599 PetscFunctionReturn(0); 2600 } 2601 /* destroy old objects */ 2602 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2603 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2604 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2605 /* Set Non-overlapping dimensions */ 2606 n_B = pcis->n_B; 2607 n_D = pcis->n - n_B; 2608 n_vertices = pcbddc->n_vertices; 2609 2610 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2611 2612 /* create auxiliary bitmask and allocate workspace */ 2613 if (!sub_schurs->reuse_solver) { 2614 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2615 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2616 for (i=0;i<n_vertices;i++) { 2617 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2618 } 2619 2620 for (i=0, n_R=0; i<pcis->n; i++) { 2621 if (!PetscBTLookup(bitmask,i)) { 2622 idx_R_local[n_R++] = i; 2623 } 2624 } 2625 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2626 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2627 2628 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2629 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2630 } 2631 2632 /* Block code */ 2633 vbs = 1; 2634 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2635 if (bs>1 && !(n_vertices%bs)) { 2636 PetscBool is_blocked = PETSC_TRUE; 2637 PetscInt *vary; 2638 if (!sub_schurs->reuse_solver) { 2639 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2640 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2641 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2642 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2643 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2644 for (i=0; i<pcis->n/bs; i++) { 2645 if (vary[i]!=0 && vary[i]!=bs) { 2646 is_blocked = PETSC_FALSE; 2647 break; 2648 } 2649 } 2650 ierr = PetscFree(vary);CHKERRQ(ierr); 2651 } else { 2652 /* Verify directly the R set */ 2653 for (i=0; i<n_R/bs; i++) { 2654 PetscInt j,node=idx_R_local[bs*i]; 2655 for (j=1; j<bs; j++) { 2656 if (node != idx_R_local[bs*i+j]-j) { 2657 is_blocked = PETSC_FALSE; 2658 break; 2659 } 2660 } 2661 } 2662 } 2663 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2664 vbs = bs; 2665 for (i=0;i<n_R/vbs;i++) { 2666 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2667 } 2668 } 2669 } 2670 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2671 if (sub_schurs->reuse_solver) { 2672 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2673 2674 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2675 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2676 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2677 reuse_solver->is_R = pcbddc->is_R_local; 2678 } else { 2679 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2680 } 2681 2682 /* print some info if requested */ 2683 if (pcbddc->dbg_flag) { 2684 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2685 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2686 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2690 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2691 } 2692 2693 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2694 if (!sub_schurs->reuse_solver) { 2695 IS is_aux1,is_aux2; 2696 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2697 2698 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2699 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2700 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2701 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2702 for (i=0; i<n_D; i++) { 2703 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2704 } 2705 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2706 for (i=0, j=0; i<n_R; i++) { 2707 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2708 aux_array1[j++] = i; 2709 } 2710 } 2711 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2712 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2713 for (i=0, j=0; i<n_B; i++) { 2714 if (!PetscBTLookup(bitmask,is_indices[i])) { 2715 aux_array2[j++] = i; 2716 } 2717 } 2718 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2719 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2720 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2721 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2722 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2723 2724 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2725 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2726 for (i=0, j=0; i<n_R; i++) { 2727 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2728 aux_array1[j++] = i; 2729 } 2730 } 2731 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2732 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2733 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2734 } 2735 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2736 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2737 } else { 2738 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2739 IS tis; 2740 PetscInt schur_size; 2741 2742 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2743 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2744 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2745 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2747 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2748 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2749 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2750 } 2751 } 2752 PetscFunctionReturn(0); 2753 } 2754 2755 2756 #undef __FUNCT__ 2757 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2758 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2759 { 2760 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2761 PC_IS *pcis = (PC_IS*)pc->data; 2762 PC pc_temp; 2763 Mat A_RR; 2764 MatReuse reuse; 2765 PetscScalar m_one = -1.0; 2766 PetscReal value; 2767 PetscInt n_D,n_R; 2768 PetscBool use_exact,use_exact_reduced,issbaij; 2769 PetscErrorCode ierr; 2770 /* prefixes stuff */ 2771 char dir_prefix[256],neu_prefix[256],str_level[16]; 2772 size_t len; 2773 2774 PetscFunctionBegin; 2775 2776 /* compute prefixes */ 2777 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2778 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2779 if (!pcbddc->current_level) { 2780 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2781 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2782 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2783 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2784 } else { 2785 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2786 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2787 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2788 len -= 15; /* remove "pc_bddc_coarse_" */ 2789 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2790 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2791 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2792 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2793 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2794 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2795 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2796 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2797 } 2798 2799 /* DIRICHLET PROBLEM */ 2800 if (dirichlet) { 2801 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2802 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2803 if (!sub_schurs->reuse_solver) { 2804 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2805 } 2806 if (pcbddc->dbg_flag) { 2807 Mat A_IIn; 2808 2809 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2810 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2811 pcis->A_II = A_IIn; 2812 } 2813 } 2814 if (pcbddc->local_mat->symmetric_set) { 2815 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2816 } 2817 /* Matrix for Dirichlet problem is pcis->A_II */ 2818 n_D = pcis->n - pcis->n_B; 2819 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2820 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2821 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2822 /* default */ 2823 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2824 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2825 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2826 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2827 if (issbaij) { 2828 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2829 } else { 2830 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2831 } 2832 /* Allow user's customization */ 2833 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2834 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2835 } 2836 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2837 if (sub_schurs->reuse_solver) { 2838 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2839 2840 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2841 } 2842 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2843 if (!n_D) { 2844 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2845 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2846 } 2847 /* Set Up KSP for Dirichlet problem of BDDC */ 2848 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2849 /* set ksp_D into pcis data */ 2850 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2851 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2852 pcis->ksp_D = pcbddc->ksp_D; 2853 } 2854 2855 /* NEUMANN PROBLEM */ 2856 A_RR = 0; 2857 if (neumann) { 2858 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2859 PetscInt ibs,mbs; 2860 PetscBool issbaij; 2861 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2862 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2863 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2864 if (pcbddc->ksp_R) { /* already created ksp */ 2865 PetscInt nn_R; 2866 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2867 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2868 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2869 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2870 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2871 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2872 reuse = MAT_INITIAL_MATRIX; 2873 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2874 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2875 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2876 reuse = MAT_INITIAL_MATRIX; 2877 } else { /* safe to reuse the matrix */ 2878 reuse = MAT_REUSE_MATRIX; 2879 } 2880 } 2881 /* last check */ 2882 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2883 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2884 reuse = MAT_INITIAL_MATRIX; 2885 } 2886 } else { /* first time, so we need to create the matrix */ 2887 reuse = MAT_INITIAL_MATRIX; 2888 } 2889 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2890 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2891 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2892 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2893 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2894 if (matis->A == pcbddc->local_mat) { 2895 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2896 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2897 } else { 2898 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2899 } 2900 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2901 if (matis->A == pcbddc->local_mat) { 2902 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2903 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2904 } else { 2905 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2906 } 2907 } 2908 /* extract A_RR */ 2909 if (sub_schurs->reuse_solver) { 2910 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2911 2912 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2913 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2914 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2915 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2916 } else { 2917 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2918 } 2919 } else { 2920 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2921 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2922 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2923 } 2924 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2925 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2926 } 2927 if (pcbddc->local_mat->symmetric_set) { 2928 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2929 } 2930 if (!pcbddc->ksp_R) { /* create object if not present */ 2931 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2932 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2933 /* default */ 2934 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2935 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2936 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2937 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2938 if (issbaij) { 2939 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2940 } else { 2941 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2942 } 2943 /* Allow user's customization */ 2944 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2945 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2946 } 2947 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2948 if (!n_R) { 2949 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2950 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2951 } 2952 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2953 /* Reuse solver if it is present */ 2954 if (sub_schurs->reuse_solver) { 2955 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2956 2957 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2958 } 2959 /* Set Up KSP for Neumann problem of BDDC */ 2960 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2961 } 2962 2963 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2964 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2965 if (pcbddc->dbg_flag) { 2966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2967 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2969 } 2970 if (dirichlet) { /* Dirichlet */ 2971 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2972 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2973 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2974 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2975 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2976 /* need to be adapted? */ 2977 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2978 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2979 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2980 /* print info */ 2981 if (pcbddc->dbg_flag) { 2982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2984 } 2985 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2986 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2987 } 2988 } 2989 if (neumann) { /* Neumann */ 2990 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2991 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2992 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2993 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2994 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2995 /* need to be adapted? */ 2996 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2997 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2998 /* print info */ 2999 if (pcbddc->dbg_flag) { 3000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3002 } 3003 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3004 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3005 } 3006 } 3007 } 3008 /* free Neumann problem's matrix */ 3009 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3015 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3016 { 3017 PetscErrorCode ierr; 3018 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3019 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3020 3021 PetscFunctionBegin; 3022 if (!sub_schurs->reuse_solver) { 3023 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3024 } 3025 if (!pcbddc->switch_static) { 3026 if (applytranspose && pcbddc->local_auxmat1) { 3027 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3028 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3029 } 3030 if (!sub_schurs->reuse_solver) { 3031 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3032 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3033 } else { 3034 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3035 3036 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3037 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3038 } 3039 } else { 3040 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3041 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3042 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3043 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3044 if (applytranspose && pcbddc->local_auxmat1) { 3045 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3046 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3047 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3048 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3049 } 3050 } 3051 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3052 if (applytranspose) { 3053 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3054 } else { 3055 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3056 } 3057 } else { 3058 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3059 3060 if (applytranspose) { 3061 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3062 } else { 3063 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3064 } 3065 } 3066 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3067 if (!pcbddc->switch_static) { 3068 if (!sub_schurs->reuse_solver) { 3069 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3070 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3071 } else { 3072 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3073 3074 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3075 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3076 } 3077 if (!applytranspose && pcbddc->local_auxmat1) { 3078 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3079 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3080 } 3081 } else { 3082 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3083 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3084 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3086 if (!applytranspose && pcbddc->local_auxmat1) { 3087 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3088 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3089 } 3090 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3091 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3092 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3093 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3094 } 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3101 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3102 { 3103 PetscErrorCode ierr; 3104 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3105 PC_IS* pcis = (PC_IS*) (pc->data); 3106 const PetscScalar zero = 0.0; 3107 3108 PetscFunctionBegin; 3109 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3110 if (!pcbddc->benign_apply_coarse_only) { 3111 if (applytranspose) { 3112 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3113 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3114 } else { 3115 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3116 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3117 } 3118 } else { 3119 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3120 } 3121 3122 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3123 if (pcbddc->benign_n) { 3124 PetscScalar *array; 3125 PetscInt j; 3126 3127 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3128 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3129 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3130 } 3131 3132 /* start communications from local primal nodes to rhs of coarse solver */ 3133 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3134 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3135 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3136 3137 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3138 /* TODO remove null space when doing multilevel */ 3139 if (pcbddc->coarse_ksp) { 3140 Mat coarse_mat; 3141 Vec rhs,sol; 3142 MatNullSpace nullsp; 3143 3144 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3145 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3146 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3147 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3148 if (nullsp) { 3149 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3150 } 3151 if (applytranspose) { 3152 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3153 } else { 3154 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3155 } 3156 if (nullsp) { 3157 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3158 } 3159 } 3160 3161 /* Local solution on R nodes */ 3162 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3163 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3164 } 3165 /* communications from coarse sol to local primal nodes */ 3166 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3167 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3168 3169 /* Sum contributions from the two levels */ 3170 if (!pcbddc->benign_apply_coarse_only) { 3171 if (applytranspose) { 3172 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3173 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3174 } else { 3175 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3176 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3177 } 3178 /* store p0 */ 3179 if (pcbddc->benign_n) { 3180 PetscScalar *array; 3181 PetscInt j; 3182 3183 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3184 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3185 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3186 } 3187 } else { /* expand the coarse solution */ 3188 if (applytranspose) { 3189 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3190 } else { 3191 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3192 } 3193 } 3194 PetscFunctionReturn(0); 3195 } 3196 3197 #undef __FUNCT__ 3198 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3199 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3200 { 3201 PetscErrorCode ierr; 3202 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3203 PetscScalar *array; 3204 Vec from,to; 3205 3206 PetscFunctionBegin; 3207 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3208 from = pcbddc->coarse_vec; 3209 to = pcbddc->vec1_P; 3210 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3211 Vec tvec; 3212 3213 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3214 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3215 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3216 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3217 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3218 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3219 } 3220 } else { /* from local to global -> put data in coarse right hand side */ 3221 from = pcbddc->vec1_P; 3222 to = pcbddc->coarse_vec; 3223 } 3224 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3230 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3231 { 3232 PetscErrorCode ierr; 3233 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3234 PetscScalar *array; 3235 Vec from,to; 3236 3237 PetscFunctionBegin; 3238 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3239 from = pcbddc->coarse_vec; 3240 to = pcbddc->vec1_P; 3241 } else { /* from local to global -> put data in coarse right hand side */ 3242 from = pcbddc->vec1_P; 3243 to = pcbddc->coarse_vec; 3244 } 3245 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3246 if (smode == SCATTER_FORWARD) { 3247 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3248 Vec tvec; 3249 3250 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3251 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3252 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3253 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3254 } 3255 } else { 3256 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3257 ierr = VecResetArray(from);CHKERRQ(ierr); 3258 } 3259 } 3260 PetscFunctionReturn(0); 3261 } 3262 3263 /* uncomment for testing purposes */ 3264 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3267 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3268 { 3269 PetscErrorCode ierr; 3270 PC_IS* pcis = (PC_IS*)(pc->data); 3271 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3272 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3273 /* one and zero */ 3274 PetscScalar one=1.0,zero=0.0; 3275 /* space to store constraints and their local indices */ 3276 PetscScalar *constraints_data; 3277 PetscInt *constraints_idxs,*constraints_idxs_B; 3278 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3279 PetscInt *constraints_n; 3280 /* iterators */ 3281 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3282 /* BLAS integers */ 3283 PetscBLASInt lwork,lierr; 3284 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3285 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3286 /* reuse */ 3287 PetscInt olocal_primal_size,olocal_primal_size_cc; 3288 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3289 /* change of basis */ 3290 PetscBool qr_needed; 3291 PetscBT change_basis,qr_needed_idx; 3292 /* auxiliary stuff */ 3293 PetscInt *nnz,*is_indices; 3294 PetscInt ncc; 3295 /* some quantities */ 3296 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3297 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3298 3299 PetscFunctionBegin; 3300 /* Destroy Mat objects computed previously */ 3301 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3302 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3303 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3304 /* save info on constraints from previous setup (if any) */ 3305 olocal_primal_size = pcbddc->local_primal_size; 3306 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3307 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3308 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3309 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3310 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3311 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3312 3313 /* print some info */ 3314 if (pcbddc->dbg_flag) { 3315 IS vertices; 3316 PetscInt nv,nedges,nfaces; 3317 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3318 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3319 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3320 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3324 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3325 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3326 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3327 } 3328 3329 if (!pcbddc->adaptive_selection) { 3330 IS ISForVertices,*ISForFaces,*ISForEdges; 3331 MatNullSpace nearnullsp; 3332 const Vec *nearnullvecs; 3333 Vec *localnearnullsp; 3334 PetscScalar *array; 3335 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3336 PetscBool nnsp_has_cnst; 3337 /* LAPACK working arrays for SVD or POD */ 3338 PetscBool skip_lapack,boolforchange; 3339 PetscScalar *work; 3340 PetscReal *singular_vals; 3341 #if defined(PETSC_USE_COMPLEX) 3342 PetscReal *rwork; 3343 #endif 3344 #if defined(PETSC_MISSING_LAPACK_GESVD) 3345 PetscScalar *temp_basis,*correlation_mat; 3346 #else 3347 PetscBLASInt dummy_int=1; 3348 PetscScalar dummy_scalar=1.; 3349 #endif 3350 3351 /* Get index sets for faces, edges and vertices from graph */ 3352 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3353 /* free unneeded index sets */ 3354 if (!pcbddc->use_vertices) { 3355 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3356 } 3357 if (!pcbddc->use_edges) { 3358 for (i=0;i<n_ISForEdges;i++) { 3359 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3360 } 3361 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3362 n_ISForEdges = 0; 3363 } 3364 if (!pcbddc->use_faces) { 3365 for (i=0;i<n_ISForFaces;i++) { 3366 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3367 } 3368 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3369 n_ISForFaces = 0; 3370 } 3371 3372 #if defined(PETSC_USE_DEBUG) 3373 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3374 Also use_change_of_basis should be consistent among processors */ 3375 if (pcbddc->NullSpace) { 3376 PetscBool tbool[2],gbool[2]; 3377 3378 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3379 pcbddc->use_change_of_basis = PETSC_TRUE; 3380 if (!ISForEdges) { 3381 pcbddc->use_change_on_faces = PETSC_TRUE; 3382 } 3383 } 3384 tbool[0] = pcbddc->use_change_of_basis; 3385 tbool[1] = pcbddc->use_change_on_faces; 3386 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3387 pcbddc->use_change_of_basis = gbool[0]; 3388 pcbddc->use_change_on_faces = gbool[1]; 3389 } 3390 #endif 3391 3392 /* check if near null space is attached to global mat */ 3393 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3394 if (nearnullsp) { 3395 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3396 /* remove any stored info */ 3397 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3398 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3399 /* store information for BDDC solver reuse */ 3400 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3401 pcbddc->onearnullspace = nearnullsp; 3402 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3403 for (i=0;i<nnsp_size;i++) { 3404 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3405 } 3406 } else { /* if near null space is not provided BDDC uses constants by default */ 3407 nnsp_size = 0; 3408 nnsp_has_cnst = PETSC_TRUE; 3409 } 3410 /* get max number of constraints on a single cc */ 3411 max_constraints = nnsp_size; 3412 if (nnsp_has_cnst) max_constraints++; 3413 3414 /* 3415 Evaluate maximum storage size needed by the procedure 3416 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3417 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3418 There can be multiple constraints per connected component 3419 */ 3420 n_vertices = 0; 3421 if (ISForVertices) { 3422 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3423 } 3424 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3425 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3426 3427 total_counts = n_ISForFaces+n_ISForEdges; 3428 total_counts *= max_constraints; 3429 total_counts += n_vertices; 3430 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3431 3432 total_counts = 0; 3433 max_size_of_constraint = 0; 3434 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3435 IS used_is; 3436 if (i<n_ISForEdges) { 3437 used_is = ISForEdges[i]; 3438 } else { 3439 used_is = ISForFaces[i-n_ISForEdges]; 3440 } 3441 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3442 total_counts += j; 3443 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3444 } 3445 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3446 3447 /* get local part of global near null space vectors */ 3448 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3449 for (k=0;k<nnsp_size;k++) { 3450 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3451 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3452 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3453 } 3454 3455 /* whether or not to skip lapack calls */ 3456 skip_lapack = PETSC_TRUE; 3457 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3458 3459 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3460 if (!skip_lapack) { 3461 PetscScalar temp_work; 3462 3463 #if defined(PETSC_MISSING_LAPACK_GESVD) 3464 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3465 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3466 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3467 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3468 #if defined(PETSC_USE_COMPLEX) 3469 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3470 #endif 3471 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3472 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3473 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3474 lwork = -1; 3475 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3476 #if !defined(PETSC_USE_COMPLEX) 3477 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3478 #else 3479 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3480 #endif 3481 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3482 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3483 #else /* on missing GESVD */ 3484 /* SVD */ 3485 PetscInt max_n,min_n; 3486 max_n = max_size_of_constraint; 3487 min_n = max_constraints; 3488 if (max_size_of_constraint < max_constraints) { 3489 min_n = max_size_of_constraint; 3490 max_n = max_constraints; 3491 } 3492 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3493 #if defined(PETSC_USE_COMPLEX) 3494 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3495 #endif 3496 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3497 lwork = -1; 3498 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3499 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3500 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3502 #if !defined(PETSC_USE_COMPLEX) 3503 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3504 #else 3505 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3506 #endif 3507 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3508 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3509 #endif /* on missing GESVD */ 3510 /* Allocate optimal workspace */ 3511 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3512 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3513 } 3514 /* Now we can loop on constraining sets */ 3515 total_counts = 0; 3516 constraints_idxs_ptr[0] = 0; 3517 constraints_data_ptr[0] = 0; 3518 /* vertices */ 3519 if (n_vertices) { 3520 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3521 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3522 for (i=0;i<n_vertices;i++) { 3523 constraints_n[total_counts] = 1; 3524 constraints_data[total_counts] = 1.0; 3525 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3526 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3527 total_counts++; 3528 } 3529 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3530 n_vertices = total_counts; 3531 } 3532 3533 /* edges and faces */ 3534 total_counts_cc = total_counts; 3535 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3536 IS used_is; 3537 PetscBool idxs_copied = PETSC_FALSE; 3538 3539 if (ncc<n_ISForEdges) { 3540 used_is = ISForEdges[ncc]; 3541 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3542 } else { 3543 used_is = ISForFaces[ncc-n_ISForEdges]; 3544 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3545 } 3546 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3547 3548 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3549 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3550 /* change of basis should not be performed on local periodic nodes */ 3551 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3552 if (nnsp_has_cnst) { 3553 PetscScalar quad_value; 3554 3555 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3556 idxs_copied = PETSC_TRUE; 3557 3558 if (!pcbddc->use_nnsp_true) { 3559 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3560 } else { 3561 quad_value = 1.0; 3562 } 3563 for (j=0;j<size_of_constraint;j++) { 3564 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3565 } 3566 temp_constraints++; 3567 total_counts++; 3568 } 3569 for (k=0;k<nnsp_size;k++) { 3570 PetscReal real_value; 3571 PetscScalar *ptr_to_data; 3572 3573 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3574 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3575 for (j=0;j<size_of_constraint;j++) { 3576 ptr_to_data[j] = array[is_indices[j]]; 3577 } 3578 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3579 /* check if array is null on the connected component */ 3580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3581 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3582 if (real_value > 0.0) { /* keep indices and values */ 3583 temp_constraints++; 3584 total_counts++; 3585 if (!idxs_copied) { 3586 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3587 idxs_copied = PETSC_TRUE; 3588 } 3589 } 3590 } 3591 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3592 valid_constraints = temp_constraints; 3593 if (!pcbddc->use_nnsp_true && temp_constraints) { 3594 if (temp_constraints == 1) { /* just normalize the constraint */ 3595 PetscScalar norm,*ptr_to_data; 3596 3597 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3598 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3599 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3600 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3601 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3602 } else { /* perform SVD */ 3603 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3604 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3605 3606 #if defined(PETSC_MISSING_LAPACK_GESVD) 3607 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3608 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3609 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3610 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3611 from that computed using LAPACKgesvd 3612 -> This is due to a different computation of eigenvectors in LAPACKheev 3613 -> The quality of the POD-computed basis will be the same */ 3614 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3615 /* Store upper triangular part of correlation matrix */ 3616 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3617 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3618 for (j=0;j<temp_constraints;j++) { 3619 for (k=0;k<j+1;k++) { 3620 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3621 } 3622 } 3623 /* compute eigenvalues and eigenvectors of correlation matrix */ 3624 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3625 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3626 #if !defined(PETSC_USE_COMPLEX) 3627 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3628 #else 3629 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3630 #endif 3631 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3632 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3633 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3634 j = 0; 3635 while (j < temp_constraints && singular_vals[j] < tol) j++; 3636 total_counts = total_counts-j; 3637 valid_constraints = temp_constraints-j; 3638 /* scale and copy POD basis into used quadrature memory */ 3639 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3640 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3641 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3642 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3643 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3644 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3645 if (j<temp_constraints) { 3646 PetscInt ii; 3647 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3648 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3649 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3650 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3651 for (k=0;k<temp_constraints-j;k++) { 3652 for (ii=0;ii<size_of_constraint;ii++) { 3653 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3654 } 3655 } 3656 } 3657 #else /* on missing GESVD */ 3658 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3659 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3660 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3661 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3662 #if !defined(PETSC_USE_COMPLEX) 3663 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3664 #else 3665 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3666 #endif 3667 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3668 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3669 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3670 k = temp_constraints; 3671 if (k > size_of_constraint) k = size_of_constraint; 3672 j = 0; 3673 while (j < k && singular_vals[k-j-1] < tol) j++; 3674 valid_constraints = k-j; 3675 total_counts = total_counts-temp_constraints+valid_constraints; 3676 #endif /* on missing GESVD */ 3677 } 3678 } 3679 /* update pointers information */ 3680 if (valid_constraints) { 3681 constraints_n[total_counts_cc] = valid_constraints; 3682 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3683 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3684 /* set change_of_basis flag */ 3685 if (boolforchange) { 3686 PetscBTSet(change_basis,total_counts_cc); 3687 } 3688 total_counts_cc++; 3689 } 3690 } 3691 /* free workspace */ 3692 if (!skip_lapack) { 3693 ierr = PetscFree(work);CHKERRQ(ierr); 3694 #if defined(PETSC_USE_COMPLEX) 3695 ierr = PetscFree(rwork);CHKERRQ(ierr); 3696 #endif 3697 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3698 #if defined(PETSC_MISSING_LAPACK_GESVD) 3699 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3700 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3701 #endif 3702 } 3703 for (k=0;k<nnsp_size;k++) { 3704 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3705 } 3706 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3707 /* free index sets of faces, edges and vertices */ 3708 for (i=0;i<n_ISForFaces;i++) { 3709 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3710 } 3711 if (n_ISForFaces) { 3712 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3713 } 3714 for (i=0;i<n_ISForEdges;i++) { 3715 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3716 } 3717 if (n_ISForEdges) { 3718 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3719 } 3720 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3721 } else { 3722 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3723 3724 total_counts = 0; 3725 n_vertices = 0; 3726 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3727 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3728 } 3729 max_constraints = 0; 3730 total_counts_cc = 0; 3731 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3732 total_counts += pcbddc->adaptive_constraints_n[i]; 3733 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3734 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3735 } 3736 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3737 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3738 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3739 constraints_data = pcbddc->adaptive_constraints_data; 3740 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3741 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3742 total_counts_cc = 0; 3743 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3744 if (pcbddc->adaptive_constraints_n[i]) { 3745 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3746 } 3747 } 3748 #if 0 3749 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3750 for (i=0;i<total_counts_cc;i++) { 3751 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3752 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3753 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3754 printf(" %d",constraints_idxs[j]); 3755 } 3756 printf("\n"); 3757 printf("number of cc: %d\n",constraints_n[i]); 3758 } 3759 for (i=0;i<n_vertices;i++) { 3760 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3761 } 3762 for (i=0;i<sub_schurs->n_subs;i++) { 3763 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3764 } 3765 #endif 3766 3767 max_size_of_constraint = 0; 3768 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3769 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3770 /* Change of basis */ 3771 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3772 if (pcbddc->use_change_of_basis) { 3773 for (i=0;i<sub_schurs->n_subs;i++) { 3774 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3775 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3776 } 3777 } 3778 } 3779 } 3780 pcbddc->local_primal_size = total_counts; 3781 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3782 3783 /* map constraints_idxs in boundary numbering */ 3784 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3785 if (i != constraints_idxs_ptr[total_counts_cc]) { 3786 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3787 } 3788 3789 /* Create constraint matrix */ 3790 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3791 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3792 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3793 3794 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3795 /* determine if a QR strategy is needed for change of basis */ 3796 qr_needed = PETSC_FALSE; 3797 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3798 total_primal_vertices=0; 3799 pcbddc->local_primal_size_cc = 0; 3800 for (i=0;i<total_counts_cc;i++) { 3801 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3802 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3803 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3804 pcbddc->local_primal_size_cc += 1; 3805 } else if (PetscBTLookup(change_basis,i)) { 3806 for (k=0;k<constraints_n[i];k++) { 3807 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3808 } 3809 pcbddc->local_primal_size_cc += constraints_n[i]; 3810 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3811 PetscBTSet(qr_needed_idx,i); 3812 qr_needed = PETSC_TRUE; 3813 } 3814 } else { 3815 pcbddc->local_primal_size_cc += 1; 3816 } 3817 } 3818 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3819 pcbddc->n_vertices = total_primal_vertices; 3820 /* permute indices in order to have a sorted set of vertices */ 3821 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3822 3823 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3824 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3825 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3826 3827 /* nonzero structure of constraint matrix */ 3828 /* and get reference dof for local constraints */ 3829 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3830 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3831 3832 j = total_primal_vertices; 3833 total_counts = total_primal_vertices; 3834 cum = total_primal_vertices; 3835 for (i=n_vertices;i<total_counts_cc;i++) { 3836 if (!PetscBTLookup(change_basis,i)) { 3837 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3838 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3839 cum++; 3840 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3841 for (k=0;k<constraints_n[i];k++) { 3842 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3843 nnz[j+k] = size_of_constraint; 3844 } 3845 j += constraints_n[i]; 3846 } 3847 } 3848 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3849 ierr = PetscFree(nnz);CHKERRQ(ierr); 3850 3851 /* set values in constraint matrix */ 3852 for (i=0;i<total_primal_vertices;i++) { 3853 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3854 } 3855 total_counts = total_primal_vertices; 3856 for (i=n_vertices;i<total_counts_cc;i++) { 3857 if (!PetscBTLookup(change_basis,i)) { 3858 PetscInt *cols; 3859 3860 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3861 cols = constraints_idxs+constraints_idxs_ptr[i]; 3862 for (k=0;k<constraints_n[i];k++) { 3863 PetscInt row = total_counts+k; 3864 PetscScalar *vals; 3865 3866 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3867 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3868 } 3869 total_counts += constraints_n[i]; 3870 } 3871 } 3872 /* assembling */ 3873 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3875 3876 /* 3877 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3878 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3879 */ 3880 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3881 if (pcbddc->use_change_of_basis) { 3882 /* dual and primal dofs on a single cc */ 3883 PetscInt dual_dofs,primal_dofs; 3884 /* working stuff for GEQRF */ 3885 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3886 PetscBLASInt lqr_work; 3887 /* working stuff for UNGQR */ 3888 PetscScalar *gqr_work,lgqr_work_t; 3889 PetscBLASInt lgqr_work; 3890 /* working stuff for TRTRS */ 3891 PetscScalar *trs_rhs; 3892 PetscBLASInt Blas_NRHS; 3893 /* pointers for values insertion into change of basis matrix */ 3894 PetscInt *start_rows,*start_cols; 3895 PetscScalar *start_vals; 3896 /* working stuff for values insertion */ 3897 PetscBT is_primal; 3898 PetscInt *aux_primal_numbering_B; 3899 /* matrix sizes */ 3900 PetscInt global_size,local_size; 3901 /* temporary change of basis */ 3902 Mat localChangeOfBasisMatrix; 3903 /* extra space for debugging */ 3904 PetscScalar *dbg_work; 3905 3906 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3907 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3908 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3909 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3910 /* nonzeros for local mat */ 3911 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3912 if (!pcbddc->benign_change || pcbddc->fake_change) { 3913 for (i=0;i<pcis->n;i++) nnz[i]=1; 3914 } else { 3915 const PetscInt *ii; 3916 PetscInt n; 3917 PetscBool flg_row; 3918 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3919 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3920 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3921 } 3922 for (i=n_vertices;i<total_counts_cc;i++) { 3923 if (PetscBTLookup(change_basis,i)) { 3924 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3925 if (PetscBTLookup(qr_needed_idx,i)) { 3926 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3927 } else { 3928 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3929 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3930 } 3931 } 3932 } 3933 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3934 ierr = PetscFree(nnz);CHKERRQ(ierr); 3935 /* Set interior change in the matrix */ 3936 if (!pcbddc->benign_change || pcbddc->fake_change) { 3937 for (i=0;i<pcis->n;i++) { 3938 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3939 } 3940 } else { 3941 const PetscInt *ii,*jj; 3942 PetscScalar *aa; 3943 PetscInt n; 3944 PetscBool flg_row; 3945 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3946 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3947 for (i=0;i<n;i++) { 3948 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3949 } 3950 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3951 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3952 } 3953 3954 if (pcbddc->dbg_flag) { 3955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3957 } 3958 3959 3960 /* Now we loop on the constraints which need a change of basis */ 3961 /* 3962 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3963 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3964 3965 Basic blocks of change of basis matrix T computed by 3966 3967 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3968 3969 | 1 0 ... 0 s_1/S | 3970 | 0 1 ... 0 s_2/S | 3971 | ... | 3972 | 0 ... 1 s_{n-1}/S | 3973 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3974 3975 with S = \sum_{i=1}^n s_i^2 3976 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3977 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3978 3979 - QR decomposition of constraints otherwise 3980 */ 3981 if (qr_needed) { 3982 /* space to store Q */ 3983 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3984 /* first we issue queries for optimal work */ 3985 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3986 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3987 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3988 lqr_work = -1; 3989 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3990 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3991 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3992 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3993 lgqr_work = -1; 3994 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3995 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3996 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3997 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3998 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3999 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4000 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4001 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4002 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4003 /* array to store scaling factors for reflectors */ 4004 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4005 /* array to store rhs and solution of triangular solver */ 4006 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4007 /* allocating workspace for check */ 4008 if (pcbddc->dbg_flag) { 4009 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4010 } 4011 } 4012 /* array to store whether a node is primal or not */ 4013 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4014 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4015 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4016 if (i != total_primal_vertices) { 4017 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 4018 } 4019 for (i=0;i<total_primal_vertices;i++) { 4020 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4021 } 4022 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4023 4024 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4025 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4026 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4027 if (PetscBTLookup(change_basis,total_counts)) { 4028 /* get constraint info */ 4029 primal_dofs = constraints_n[total_counts]; 4030 dual_dofs = size_of_constraint-primal_dofs; 4031 4032 if (pcbddc->dbg_flag) { 4033 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4034 } 4035 4036 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4037 4038 /* copy quadrature constraints for change of basis check */ 4039 if (pcbddc->dbg_flag) { 4040 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4041 } 4042 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4043 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4044 4045 /* compute QR decomposition of constraints */ 4046 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4047 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4048 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4049 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4050 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4051 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4052 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4053 4054 /* explictly compute R^-T */ 4055 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4056 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4057 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4058 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4060 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4061 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4062 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4063 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4064 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4065 4066 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4067 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4068 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4069 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4071 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4072 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4073 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4074 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4075 4076 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4077 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4078 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4080 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4081 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4082 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4083 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4085 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4086 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4087 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4088 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4089 4090 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4091 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4092 /* insert cols for primal dofs */ 4093 for (j=0;j<primal_dofs;j++) { 4094 start_vals = &qr_basis[j*size_of_constraint]; 4095 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4096 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4097 } 4098 /* insert cols for dual dofs */ 4099 for (j=0,k=0;j<dual_dofs;k++) { 4100 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4101 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4102 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4103 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4104 j++; 4105 } 4106 } 4107 4108 /* check change of basis */ 4109 if (pcbddc->dbg_flag) { 4110 PetscInt ii,jj; 4111 PetscBool valid_qr=PETSC_TRUE; 4112 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4113 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4115 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4116 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4117 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4118 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4119 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4120 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4121 for (jj=0;jj<size_of_constraint;jj++) { 4122 for (ii=0;ii<primal_dofs;ii++) { 4123 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4124 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4125 } 4126 } 4127 if (!valid_qr) { 4128 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4129 for (jj=0;jj<size_of_constraint;jj++) { 4130 for (ii=0;ii<primal_dofs;ii++) { 4131 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4132 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4133 } 4134 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4135 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4136 } 4137 } 4138 } 4139 } else { 4140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4141 } 4142 } 4143 } else { /* simple transformation block */ 4144 PetscInt row,col; 4145 PetscScalar val,norm; 4146 4147 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4148 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4149 for (j=0;j<size_of_constraint;j++) { 4150 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4151 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4152 if (!PetscBTLookup(is_primal,row_B)) { 4153 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4154 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4155 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4156 } else { 4157 for (k=0;k<size_of_constraint;k++) { 4158 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4159 if (row != col) { 4160 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4161 } else { 4162 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4163 } 4164 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4165 } 4166 } 4167 } 4168 if (pcbddc->dbg_flag) { 4169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4170 } 4171 } 4172 } else { 4173 if (pcbddc->dbg_flag) { 4174 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4175 } 4176 } 4177 } 4178 4179 /* free workspace */ 4180 if (qr_needed) { 4181 if (pcbddc->dbg_flag) { 4182 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4183 } 4184 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4185 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4186 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4187 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4188 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4189 } 4190 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4191 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4192 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4193 4194 /* assembling of global change of variable */ 4195 if (!pcbddc->fake_change) { 4196 Mat tmat; 4197 PetscInt bs; 4198 4199 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4200 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4201 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4202 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4203 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4204 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4205 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4206 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4207 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4208 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4209 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4210 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4211 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4212 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4213 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4214 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4215 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4216 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4217 4218 /* check */ 4219 if (pcbddc->dbg_flag) { 4220 PetscReal error; 4221 Vec x,x_change; 4222 4223 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4224 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4225 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4226 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4227 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4228 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4229 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4230 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4231 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4232 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4233 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4234 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4235 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4237 ierr = VecDestroy(&x);CHKERRQ(ierr); 4238 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4239 } 4240 /* adapt sub_schurs computed (if any) */ 4241 if (pcbddc->use_deluxe_scaling) { 4242 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4243 4244 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4245 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4246 } 4247 if (sub_schurs->S_Ej_all) { 4248 Mat S_new,tmat; 4249 IS is_all_N,is_V_Sall = NULL; 4250 4251 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4252 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4253 if (pcbddc->deluxe_zerorows) { 4254 ISLocalToGlobalMapping NtoSall; 4255 IS is_V; 4256 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4257 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4258 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4259 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4260 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4261 } 4262 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4263 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4264 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4265 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4266 if (pcbddc->deluxe_zerorows) { 4267 const PetscScalar *array; 4268 const PetscInt *idxs_V,*idxs_all; 4269 PetscInt i,n_V; 4270 4271 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4272 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4273 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4274 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4275 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4276 for (i=0;i<n_V;i++) { 4277 PetscScalar val; 4278 PetscInt idx; 4279 4280 idx = idxs_V[i]; 4281 val = array[idxs_all[idxs_V[i]]]; 4282 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4283 } 4284 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4285 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4286 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4287 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4288 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4289 } 4290 sub_schurs->S_Ej_all = S_new; 4291 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4292 if (sub_schurs->sum_S_Ej_all) { 4293 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4294 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4295 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4296 if (pcbddc->deluxe_zerorows) { 4297 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4298 } 4299 sub_schurs->sum_S_Ej_all = S_new; 4300 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4301 } 4302 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4303 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4304 } 4305 /* destroy any change of basis context in sub_schurs */ 4306 if (sub_schurs->change) { 4307 PetscInt i; 4308 4309 for (i=0;i<sub_schurs->n_subs;i++) { 4310 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4311 } 4312 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4313 } 4314 } 4315 if (pcbddc->switch_static) { /* need to save the local change */ 4316 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4317 } else { 4318 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4319 } 4320 /* determine if any process has changed the pressures locally */ 4321 if (pcbddc->benign_saddle_point) { 4322 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4323 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4324 } 4325 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4326 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4327 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4328 pcbddc->use_qr_single = qr_needed; 4329 } 4330 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4331 PetscBool needglobal = PETSC_FALSE; 4332 if (pcbddc->benign_saddle_point) { 4333 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4334 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4335 } 4336 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4337 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4338 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4339 } else { 4340 Mat benign_global = NULL; 4341 if (needglobal) { 4342 Mat tmat; 4343 4344 pcbddc->change_interior = PETSC_TRUE; 4345 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4346 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4347 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4348 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4349 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4350 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4351 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4352 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4353 if (pcbddc->benign_change) { 4354 Mat M; 4355 4356 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4357 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4358 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4359 ierr = MatDestroy(&M);CHKERRQ(ierr); 4360 } else { 4361 Mat eye; 4362 PetscScalar *array; 4363 4364 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4365 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4366 for (i=0;i<pcis->n;i++) { 4367 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4368 } 4369 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4370 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4371 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4372 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4373 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4374 } 4375 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4376 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4377 } 4378 if (pcbddc->user_ChangeOfBasisMatrix) { 4379 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4380 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4381 } else if (needglobal) { 4382 pcbddc->ChangeOfBasisMatrix = benign_global; 4383 } 4384 } 4385 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4386 IS is_global; 4387 const PetscInt *gidxs; 4388 4389 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4390 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4391 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4392 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4393 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4394 } 4395 } 4396 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4397 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4398 } 4399 4400 if (!pcbddc->fake_change) { 4401 /* add pressure dofs to set of primal nodes for numbering purposes */ 4402 for (i=0;i<pcbddc->benign_n;i++) { 4403 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4404 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4405 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4406 pcbddc->local_primal_size_cc++; 4407 pcbddc->local_primal_size++; 4408 } 4409 4410 /* check if a new primal space has been introduced (also take into account benign trick) */ 4411 pcbddc->new_primal_space_local = PETSC_TRUE; 4412 if (olocal_primal_size == pcbddc->local_primal_size) { 4413 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4414 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4415 if (!pcbddc->new_primal_space_local) { 4416 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4417 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4418 } 4419 } 4420 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4421 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4422 } 4423 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4424 4425 /* flush dbg viewer */ 4426 if (pcbddc->dbg_flag) { 4427 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4428 } 4429 4430 /* free workspace */ 4431 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4432 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4433 if (!pcbddc->adaptive_selection) { 4434 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4435 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4436 } else { 4437 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4438 pcbddc->adaptive_constraints_idxs_ptr, 4439 pcbddc->adaptive_constraints_data_ptr, 4440 pcbddc->adaptive_constraints_idxs, 4441 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4442 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4443 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4444 } 4445 PetscFunctionReturn(0); 4446 } 4447 4448 #undef __FUNCT__ 4449 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4450 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4451 { 4452 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4453 PC_IS *pcis = (PC_IS*)pc->data; 4454 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4455 PetscInt ierr,i,N; 4456 PetscViewer viewer=pcbddc->dbg_viewer; 4457 4458 PetscFunctionBegin; 4459 /* Reset previously computed graph */ 4460 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4461 /* Init local Graph struct */ 4462 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4463 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4464 4465 /* Check validity of the csr graph passed in by the user */ 4466 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4467 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4468 } 4469 4470 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4471 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4472 PetscInt *xadj,*adjncy; 4473 PetscInt nvtxs; 4474 PetscBool flg_row=PETSC_FALSE; 4475 4476 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4477 if (flg_row) { 4478 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4479 pcbddc->computed_rowadj = PETSC_TRUE; 4480 } 4481 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4482 } 4483 if (pcbddc->dbg_flag) { 4484 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4485 } 4486 4487 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4488 if (pcbddc->user_provided_isfordofs) { 4489 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4490 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4491 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4492 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4493 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4494 } 4495 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4496 pcbddc->n_ISForDofs = 0; 4497 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4498 } 4499 } else { 4500 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4501 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4502 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4503 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4504 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4505 } 4506 } 4507 } 4508 4509 /* Setup of Graph */ 4510 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4511 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4512 } 4513 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4514 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4515 } 4516 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4517 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4518 } 4519 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4520 4521 /* attach info on disconnected subdomains if present */ 4522 if (pcbddc->n_local_subs) { 4523 PetscInt *local_subs; 4524 4525 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4526 for (i=0;i<pcbddc->n_local_subs;i++) { 4527 const PetscInt *idxs; 4528 PetscInt nl,j; 4529 4530 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4531 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4532 for (j=0;j<nl;j++) { 4533 local_subs[idxs[j]] = i; 4534 } 4535 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4536 } 4537 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4538 pcbddc->mat_graph->local_subs = local_subs; 4539 } 4540 4541 /* Graph's connected components analysis */ 4542 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4543 4544 /* print some info to stdout */ 4545 if (pcbddc->dbg_flag) { 4546 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4547 } 4548 PetscFunctionReturn(0); 4549 } 4550 4551 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4552 #undef __FUNCT__ 4553 #define __FUNCT__ "PCBDDCSubsetNumbering" 4554 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4555 { 4556 PetscSF sf; 4557 PetscLayout map; 4558 const PetscInt *idxs; 4559 PetscInt *leaf_data,*root_data,*gidxs; 4560 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4561 PetscInt n_n,nlocals,start,first_index; 4562 PetscMPIInt commsize; 4563 PetscBool first_found; 4564 PetscErrorCode ierr; 4565 4566 PetscFunctionBegin; 4567 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4568 if (subset_mult) { 4569 PetscCheckSameComm(subset,1,subset_mult,2); 4570 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4571 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4572 } 4573 /* create workspace layout for computing global indices of subset */ 4574 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4575 lbounds[0] = lbounds[1] = 0; 4576 for (i=0;i<n;i++) { 4577 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4578 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4579 } 4580 lbounds[0] = -lbounds[0]; 4581 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4582 gbounds[0] = -gbounds[0]; 4583 N = gbounds[1] - gbounds[0] + 1; 4584 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4585 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4586 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4587 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4588 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4589 4590 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4591 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4592 if (subset_mult) { 4593 const PetscInt* idxs_mult; 4594 4595 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4596 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4597 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4598 } else { 4599 for (i=0;i<n;i++) leaf_data[i] = 1; 4600 } 4601 /* local size of new subset */ 4602 n_n = 0; 4603 for (i=0;i<n;i++) n_n += leaf_data[i]; 4604 4605 /* global indexes in layout */ 4606 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4607 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4608 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4609 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4610 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4611 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4612 4613 /* reduce from leaves to roots */ 4614 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4615 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4616 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4617 4618 /* count indexes in local part of layout */ 4619 nlocals = 0; 4620 first_index = -1; 4621 first_found = PETSC_FALSE; 4622 for (i=0;i<Nl;i++) { 4623 if (!first_found && root_data[i]) { 4624 first_found = PETSC_TRUE; 4625 first_index = i; 4626 } 4627 nlocals += root_data[i]; 4628 } 4629 4630 /* cumulative of number of indexes and size of subset without holes */ 4631 #if defined(PETSC_HAVE_MPI_EXSCAN) 4632 start = 0; 4633 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4634 #else 4635 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4636 start = start-nlocals; 4637 #endif 4638 4639 if (N_n) { /* compute total size of new subset if requested */ 4640 *N_n = start + nlocals; 4641 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4642 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4643 } 4644 4645 /* adapt root data with cumulative */ 4646 if (first_found) { 4647 PetscInt old_index; 4648 4649 root_data[first_index] += start; 4650 old_index = first_index; 4651 for (i=first_index+1;i<Nl;i++) { 4652 if (root_data[i]) { 4653 root_data[i] += root_data[old_index]; 4654 old_index = i; 4655 } 4656 } 4657 } 4658 4659 /* from roots to leaves */ 4660 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4661 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4662 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4663 4664 /* create new IS with global indexes without holes */ 4665 if (subset_mult) { 4666 const PetscInt* idxs_mult; 4667 PetscInt cum; 4668 4669 cum = 0; 4670 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4671 for (i=0;i<n;i++) { 4672 PetscInt j; 4673 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4674 } 4675 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4676 } else { 4677 for (i=0;i<n;i++) { 4678 gidxs[i] = leaf_data[i]-1; 4679 } 4680 } 4681 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4682 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4683 PetscFunctionReturn(0); 4684 } 4685 4686 #undef __FUNCT__ 4687 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4688 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4689 { 4690 PetscInt i,j; 4691 PetscScalar *alphas; 4692 PetscErrorCode ierr; 4693 4694 PetscFunctionBegin; 4695 /* this implements stabilized Gram-Schmidt */ 4696 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4697 for (i=0;i<n;i++) { 4698 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4699 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4700 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4701 } 4702 ierr = PetscFree(alphas);CHKERRQ(ierr); 4703 PetscFunctionReturn(0); 4704 } 4705 4706 #undef __FUNCT__ 4707 #define __FUNCT__ "MatISGetSubassemblingPattern" 4708 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4709 { 4710 IS ranks_send_to; 4711 PetscInt n_neighs,*neighs,*n_shared,**shared; 4712 PetscMPIInt size,rank,color; 4713 PetscInt *xadj,*adjncy; 4714 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4715 PetscInt i,local_size,threshold=0; 4716 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4717 PetscSubcomm subcomm; 4718 PetscErrorCode ierr; 4719 4720 PetscFunctionBegin; 4721 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4722 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4723 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4724 4725 /* Get info on mapping */ 4726 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4727 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4728 4729 /* build local CSR graph of subdomains' connectivity */ 4730 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4731 xadj[0] = 0; 4732 xadj[1] = PetscMax(n_neighs-1,0); 4733 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4734 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4735 4736 if (threshold) { 4737 PetscInt xadj_count = 0; 4738 for (i=1;i<n_neighs;i++) { 4739 if (n_shared[i] > threshold) { 4740 adjncy[xadj_count] = neighs[i]; 4741 adjncy_wgt[xadj_count] = n_shared[i]; 4742 xadj_count++; 4743 } 4744 } 4745 xadj[1] = xadj_count; 4746 } else { 4747 if (xadj[1]) { 4748 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4749 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4750 } 4751 } 4752 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4753 if (use_square) { 4754 for (i=0;i<xadj[1];i++) { 4755 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4756 } 4757 } 4758 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4759 4760 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4761 4762 /* 4763 Restrict work on active processes only. 4764 */ 4765 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4766 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4767 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4768 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4769 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4770 if (color) { 4771 ierr = PetscFree(xadj);CHKERRQ(ierr); 4772 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4773 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4774 } else { 4775 Mat subdomain_adj; 4776 IS new_ranks,new_ranks_contig; 4777 MatPartitioning partitioner; 4778 PetscInt prank,rstart=0,rend=0; 4779 PetscInt *is_indices,*oldranks; 4780 PetscBool aggregate; 4781 4782 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4783 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4784 prank = rank; 4785 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4786 /* 4787 for (i=0;i<size;i++) { 4788 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4789 } 4790 */ 4791 for (i=0;i<xadj[1];i++) { 4792 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4793 } 4794 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4795 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4796 if (aggregate) { 4797 PetscInt lrows,row,ncols,*cols; 4798 PetscMPIInt nrank; 4799 PetscScalar *vals; 4800 4801 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4802 lrows = 0; 4803 if (nrank<redprocs) { 4804 lrows = size/redprocs; 4805 if (nrank<size%redprocs) lrows++; 4806 } 4807 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4808 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4809 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4810 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4811 row = nrank; 4812 ncols = xadj[1]-xadj[0]; 4813 cols = adjncy; 4814 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4815 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4816 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4817 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4818 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4819 ierr = PetscFree(xadj);CHKERRQ(ierr); 4820 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4821 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4822 ierr = PetscFree(vals);CHKERRQ(ierr); 4823 } else { 4824 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4825 } 4826 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4827 4828 /* Partition */ 4829 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4830 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4831 if (use_vwgt) { 4832 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4833 v_wgt[0] = local_size; 4834 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4835 } 4836 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4837 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4838 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4839 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4840 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4841 4842 /* renumber new_ranks to avoid "holes" in new set of processors */ 4843 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4844 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4845 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4846 if (!redprocs) { 4847 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4848 } else { 4849 PetscInt idxs[1]; 4850 PetscMPIInt tag; 4851 MPI_Request *reqs; 4852 4853 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4854 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4855 for (i=rstart;i<rend;i++) { 4856 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4857 } 4858 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4859 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4860 ierr = PetscFree(reqs);CHKERRQ(ierr); 4861 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4862 } 4863 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4864 /* clean up */ 4865 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4866 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4867 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4868 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4869 } 4870 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4871 4872 /* assemble parallel IS for sends */ 4873 i = 1; 4874 if (color) i=0; 4875 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4876 /* get back IS */ 4877 *is_sends = ranks_send_to; 4878 PetscFunctionReturn(0); 4879 } 4880 4881 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4882 4883 #undef __FUNCT__ 4884 #define __FUNCT__ "MatISSubassemble" 4885 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4886 { 4887 Mat local_mat; 4888 IS is_sends_internal; 4889 PetscInt rows,cols,new_local_rows; 4890 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4891 PetscBool ismatis,isdense,newisdense,destroy_mat; 4892 ISLocalToGlobalMapping l2gmap; 4893 PetscInt* l2gmap_indices; 4894 const PetscInt* is_indices; 4895 MatType new_local_type; 4896 /* buffers */ 4897 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4898 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4899 PetscInt *recv_buffer_idxs_local; 4900 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4901 /* MPI */ 4902 MPI_Comm comm,comm_n; 4903 PetscSubcomm subcomm; 4904 PetscMPIInt n_sends,n_recvs,commsize; 4905 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4906 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4907 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4908 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4909 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4910 PetscErrorCode ierr; 4911 4912 PetscFunctionBegin; 4913 /* TODO: add missing checks */ 4914 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4915 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4916 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4917 PetscValidLogicalCollectiveInt(mat,nis,7); 4918 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4919 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4920 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4921 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4922 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4923 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4924 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4925 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4926 PetscInt mrows,mcols,mnrows,mncols; 4927 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4928 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4929 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4930 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4931 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4932 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4933 } 4934 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4935 PetscValidLogicalCollectiveInt(mat,bs,0); 4936 /* prepare IS for sending if not provided */ 4937 if (!is_sends) { 4938 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4939 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4940 } else { 4941 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4942 is_sends_internal = is_sends; 4943 } 4944 4945 /* get comm */ 4946 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4947 4948 /* compute number of sends */ 4949 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4950 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4951 4952 /* compute number of receives */ 4953 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4954 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4955 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4956 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4957 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4958 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4959 ierr = PetscFree(iflags);CHKERRQ(ierr); 4960 4961 /* restrict comm if requested */ 4962 subcomm = 0; 4963 destroy_mat = PETSC_FALSE; 4964 if (restrict_comm) { 4965 PetscMPIInt color,subcommsize; 4966 4967 color = 0; 4968 if (restrict_full) { 4969 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4970 } else { 4971 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4972 } 4973 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4974 subcommsize = commsize - subcommsize; 4975 /* check if reuse has been requested */ 4976 if (reuse == MAT_REUSE_MATRIX) { 4977 if (*mat_n) { 4978 PetscMPIInt subcommsize2; 4979 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4980 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4981 comm_n = PetscObjectComm((PetscObject)*mat_n); 4982 } else { 4983 comm_n = PETSC_COMM_SELF; 4984 } 4985 } else { /* MAT_INITIAL_MATRIX */ 4986 PetscMPIInt rank; 4987 4988 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4989 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4990 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4991 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4992 comm_n = PetscSubcommChild(subcomm); 4993 } 4994 /* flag to destroy *mat_n if not significative */ 4995 if (color) destroy_mat = PETSC_TRUE; 4996 } else { 4997 comm_n = comm; 4998 } 4999 5000 /* prepare send/receive buffers */ 5001 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5002 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5003 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5004 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5005 if (nis) { 5006 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5007 } 5008 5009 /* Get data from local matrices */ 5010 if (!isdense) { 5011 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5012 /* TODO: See below some guidelines on how to prepare the local buffers */ 5013 /* 5014 send_buffer_vals should contain the raw values of the local matrix 5015 send_buffer_idxs should contain: 5016 - MatType_PRIVATE type 5017 - PetscInt size_of_l2gmap 5018 - PetscInt global_row_indices[size_of_l2gmap] 5019 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5020 */ 5021 } else { 5022 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5023 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5024 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5025 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5026 send_buffer_idxs[1] = i; 5027 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5028 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5029 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5030 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5031 for (i=0;i<n_sends;i++) { 5032 ilengths_vals[is_indices[i]] = len*len; 5033 ilengths_idxs[is_indices[i]] = len+2; 5034 } 5035 } 5036 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5037 /* additional is (if any) */ 5038 if (nis) { 5039 PetscMPIInt psum; 5040 PetscInt j; 5041 for (j=0,psum=0;j<nis;j++) { 5042 PetscInt plen; 5043 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5044 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5045 psum += len+1; /* indices + lenght */ 5046 } 5047 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5048 for (j=0,psum=0;j<nis;j++) { 5049 PetscInt plen; 5050 const PetscInt *is_array_idxs; 5051 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5052 send_buffer_idxs_is[psum] = plen; 5053 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5054 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5055 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5056 psum += plen+1; /* indices + lenght */ 5057 } 5058 for (i=0;i<n_sends;i++) { 5059 ilengths_idxs_is[is_indices[i]] = psum; 5060 } 5061 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5062 } 5063 5064 buf_size_idxs = 0; 5065 buf_size_vals = 0; 5066 buf_size_idxs_is = 0; 5067 for (i=0;i<n_recvs;i++) { 5068 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5069 buf_size_vals += (PetscInt)olengths_vals[i]; 5070 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5071 } 5072 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5073 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5074 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5075 5076 /* get new tags for clean communications */ 5077 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5078 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5079 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5080 5081 /* allocate for requests */ 5082 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5083 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5084 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5085 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5086 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5087 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5088 5089 /* communications */ 5090 ptr_idxs = recv_buffer_idxs; 5091 ptr_vals = recv_buffer_vals; 5092 ptr_idxs_is = recv_buffer_idxs_is; 5093 for (i=0;i<n_recvs;i++) { 5094 source_dest = onodes[i]; 5095 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5096 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5097 ptr_idxs += olengths_idxs[i]; 5098 ptr_vals += olengths_vals[i]; 5099 if (nis) { 5100 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5101 ptr_idxs_is += olengths_idxs_is[i]; 5102 } 5103 } 5104 for (i=0;i<n_sends;i++) { 5105 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5106 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5107 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5108 if (nis) { 5109 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5110 } 5111 } 5112 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5113 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5114 5115 /* assemble new l2g map */ 5116 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5117 ptr_idxs = recv_buffer_idxs; 5118 new_local_rows = 0; 5119 for (i=0;i<n_recvs;i++) { 5120 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5121 ptr_idxs += olengths_idxs[i]; 5122 } 5123 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5124 ptr_idxs = recv_buffer_idxs; 5125 new_local_rows = 0; 5126 for (i=0;i<n_recvs;i++) { 5127 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5128 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5129 ptr_idxs += olengths_idxs[i]; 5130 } 5131 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5132 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5133 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5134 5135 /* infer new local matrix type from received local matrices type */ 5136 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5137 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5138 if (n_recvs) { 5139 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5140 ptr_idxs = recv_buffer_idxs; 5141 for (i=0;i<n_recvs;i++) { 5142 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5143 new_local_type_private = MATAIJ_PRIVATE; 5144 break; 5145 } 5146 ptr_idxs += olengths_idxs[i]; 5147 } 5148 switch (new_local_type_private) { 5149 case MATDENSE_PRIVATE: 5150 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5151 new_local_type = MATSEQAIJ; 5152 bs = 1; 5153 } else { /* if I receive only 1 dense matrix */ 5154 new_local_type = MATSEQDENSE; 5155 bs = 1; 5156 } 5157 break; 5158 case MATAIJ_PRIVATE: 5159 new_local_type = MATSEQAIJ; 5160 bs = 1; 5161 break; 5162 case MATBAIJ_PRIVATE: 5163 new_local_type = MATSEQBAIJ; 5164 break; 5165 case MATSBAIJ_PRIVATE: 5166 new_local_type = MATSEQSBAIJ; 5167 break; 5168 default: 5169 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5170 break; 5171 } 5172 } else { /* by default, new_local_type is seqdense */ 5173 new_local_type = MATSEQDENSE; 5174 bs = 1; 5175 } 5176 5177 /* create MATIS object if needed */ 5178 if (reuse == MAT_INITIAL_MATRIX) { 5179 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5180 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5181 } else { 5182 /* it also destroys the local matrices */ 5183 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5184 } 5185 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5186 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5187 5188 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5189 5190 /* Global to local map of received indices */ 5191 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5192 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5193 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5194 5195 /* restore attributes -> type of incoming data and its size */ 5196 buf_size_idxs = 0; 5197 for (i=0;i<n_recvs;i++) { 5198 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5199 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5200 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5201 } 5202 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5203 5204 /* set preallocation */ 5205 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5206 if (!newisdense) { 5207 PetscInt *new_local_nnz=0; 5208 5209 ptr_vals = recv_buffer_vals; 5210 ptr_idxs = recv_buffer_idxs_local; 5211 if (n_recvs) { 5212 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5213 } 5214 for (i=0;i<n_recvs;i++) { 5215 PetscInt j; 5216 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5217 for (j=0;j<*(ptr_idxs+1);j++) { 5218 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5219 } 5220 } else { 5221 /* TODO */ 5222 } 5223 ptr_idxs += olengths_idxs[i]; 5224 } 5225 if (new_local_nnz) { 5226 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5227 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5228 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5229 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5230 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5231 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5232 } else { 5233 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5234 } 5235 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5236 } else { 5237 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5238 } 5239 5240 /* set values */ 5241 ptr_vals = recv_buffer_vals; 5242 ptr_idxs = recv_buffer_idxs_local; 5243 for (i=0;i<n_recvs;i++) { 5244 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5245 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5246 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5247 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5248 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5249 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5250 } else { 5251 /* TODO */ 5252 } 5253 ptr_idxs += olengths_idxs[i]; 5254 ptr_vals += olengths_vals[i]; 5255 } 5256 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5257 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5258 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5259 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5260 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5261 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5262 5263 #if 0 5264 if (!restrict_comm) { /* check */ 5265 Vec lvec,rvec; 5266 PetscReal infty_error; 5267 5268 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5269 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5270 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5271 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5272 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5273 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5274 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5275 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5276 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5277 } 5278 #endif 5279 5280 /* assemble new additional is (if any) */ 5281 if (nis) { 5282 PetscInt **temp_idxs,*count_is,j,psum; 5283 5284 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5285 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5286 ptr_idxs = recv_buffer_idxs_is; 5287 psum = 0; 5288 for (i=0;i<n_recvs;i++) { 5289 for (j=0;j<nis;j++) { 5290 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5291 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5292 psum += plen; 5293 ptr_idxs += plen+1; /* shift pointer to received data */ 5294 } 5295 } 5296 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5297 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5298 for (i=1;i<nis;i++) { 5299 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5300 } 5301 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5302 ptr_idxs = recv_buffer_idxs_is; 5303 for (i=0;i<n_recvs;i++) { 5304 for (j=0;j<nis;j++) { 5305 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5306 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5307 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5308 ptr_idxs += plen+1; /* shift pointer to received data */ 5309 } 5310 } 5311 for (i=0;i<nis;i++) { 5312 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5313 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5314 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5315 } 5316 ierr = PetscFree(count_is);CHKERRQ(ierr); 5317 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5318 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5319 } 5320 /* free workspace */ 5321 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5322 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5323 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5324 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5325 if (isdense) { 5326 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5327 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5328 } else { 5329 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5330 } 5331 if (nis) { 5332 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5333 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5334 } 5335 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5336 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5337 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5338 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5339 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5340 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5341 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5342 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5343 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5344 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5345 ierr = PetscFree(onodes);CHKERRQ(ierr); 5346 if (nis) { 5347 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5348 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5349 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5350 } 5351 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5352 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5353 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5354 for (i=0;i<nis;i++) { 5355 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5356 } 5357 *mat_n = NULL; 5358 } 5359 PetscFunctionReturn(0); 5360 } 5361 5362 /* temporary hack into ksp private data structure */ 5363 #include <petsc/private/kspimpl.h> 5364 5365 #undef __FUNCT__ 5366 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5367 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5368 { 5369 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5370 PC_IS *pcis = (PC_IS*)pc->data; 5371 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5372 MatNullSpace CoarseNullSpace=NULL; 5373 ISLocalToGlobalMapping coarse_islg; 5374 IS coarse_is,*isarray; 5375 PetscInt i,im_active=-1,active_procs=-1; 5376 PetscInt nis,nisdofs,nisneu,nisvert; 5377 PC pc_temp; 5378 PCType coarse_pc_type; 5379 KSPType coarse_ksp_type; 5380 PetscBool multilevel_requested,multilevel_allowed; 5381 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5382 Mat t_coarse_mat_is; 5383 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5384 PetscMPIInt all_procs; 5385 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5386 PetscBool compute_vecs = PETSC_FALSE; 5387 PetscScalar *array; 5388 PetscErrorCode ierr; 5389 5390 PetscFunctionBegin; 5391 /* Assign global numbering to coarse dofs */ 5392 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5393 PetscInt ocoarse_size; 5394 compute_vecs = PETSC_TRUE; 5395 ocoarse_size = pcbddc->coarse_size; 5396 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5397 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5398 /* see if we can avoid some work */ 5399 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5400 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5401 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5402 PC pc; 5403 PetscBool isbddc; 5404 5405 /* temporary workaround since PCBDDC does not have a reset method so far */ 5406 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5407 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5408 if (isbddc) { 5409 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5410 } else { 5411 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5412 } 5413 coarse_reuse = PETSC_FALSE; 5414 } else { /* we can safely reuse already computed coarse matrix */ 5415 coarse_reuse = PETSC_TRUE; 5416 } 5417 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5418 coarse_reuse = PETSC_FALSE; 5419 } 5420 /* reset any subassembling information */ 5421 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5422 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5423 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5424 coarse_reuse = PETSC_TRUE; 5425 } 5426 5427 /* count "active" (i.e. with positive local size) and "void" processes */ 5428 im_active = !!(pcis->n); 5429 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5430 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5431 void_procs = all_procs-active_procs; 5432 csin_type_simple = PETSC_TRUE; 5433 redist = PETSC_FALSE; 5434 if (pcbddc->current_level && void_procs) { 5435 csin_ml = PETSC_TRUE; 5436 ncoarse_ml = void_procs; 5437 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5438 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5439 csin_ds = PETSC_TRUE; 5440 ncoarse_ds = pcbddc->redistribute_coarse; 5441 redist = PETSC_TRUE; 5442 } else { 5443 csin_ds = PETSC_TRUE; 5444 ncoarse_ds = active_procs; 5445 redist = PETSC_TRUE; 5446 } 5447 } else { 5448 csin_ml = PETSC_FALSE; 5449 ncoarse_ml = all_procs; 5450 if (void_procs) { 5451 csin_ds = PETSC_TRUE; 5452 ncoarse_ds = void_procs; 5453 csin_type_simple = PETSC_FALSE; 5454 } else { 5455 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5456 csin_ds = PETSC_TRUE; 5457 ncoarse_ds = pcbddc->redistribute_coarse; 5458 redist = PETSC_TRUE; 5459 } else { 5460 csin_ds = PETSC_FALSE; 5461 ncoarse_ds = all_procs; 5462 } 5463 } 5464 } 5465 5466 /* 5467 test if we can go multilevel: three conditions must be satisfied: 5468 - we have not exceeded the number of levels requested 5469 - we can actually subassemble the active processes 5470 - we can find a suitable number of MPI processes where we can place the subassembled problem 5471 */ 5472 multilevel_allowed = PETSC_FALSE; 5473 multilevel_requested = PETSC_FALSE; 5474 if (pcbddc->current_level < pcbddc->max_levels) { 5475 multilevel_requested = PETSC_TRUE; 5476 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5477 multilevel_allowed = PETSC_FALSE; 5478 } else { 5479 multilevel_allowed = PETSC_TRUE; 5480 } 5481 } 5482 /* determine number of process partecipating to coarse solver */ 5483 if (multilevel_allowed) { 5484 ncoarse = ncoarse_ml; 5485 csin = csin_ml; 5486 redist = PETSC_FALSE; 5487 } else { 5488 ncoarse = ncoarse_ds; 5489 csin = csin_ds; 5490 } 5491 5492 /* creates temporary l2gmap and IS for coarse indexes */ 5493 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5494 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5495 5496 /* creates temporary MATIS object for coarse matrix */ 5497 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5498 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5499 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5500 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5501 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5502 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5503 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5504 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5505 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5506 5507 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5508 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5509 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5510 const PetscInt *idxs; 5511 ISLocalToGlobalMapping tmap; 5512 5513 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5514 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5515 /* allocate space for temporary storage */ 5516 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5517 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5518 /* allocate for IS array */ 5519 nisdofs = pcbddc->n_ISForDofsLocal; 5520 nisneu = !!pcbddc->NeumannBoundariesLocal; 5521 nisvert = 0; 5522 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5523 nisvert = 1; 5524 } 5525 nis = nisdofs + nisneu + nisvert; 5526 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5527 /* dofs splitting */ 5528 for (i=0;i<nisdofs;i++) { 5529 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5530 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5531 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5532 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5533 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5534 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5535 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5536 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5537 } 5538 /* neumann boundaries */ 5539 if (pcbddc->NeumannBoundariesLocal) { 5540 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5541 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5542 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5543 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5544 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5545 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5546 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5547 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5548 } 5549 /* primal vertices (benign) */ 5550 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5551 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5552 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5553 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5554 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5555 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5556 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5557 } 5558 /* free memory */ 5559 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5560 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5561 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5562 } else { 5563 nis = 0; 5564 nisdofs = 0; 5565 nisneu = 0; 5566 nisvert = 0; 5567 isarray = NULL; 5568 } 5569 /* destroy no longer needed map */ 5570 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5571 5572 /* restrict on coarse candidates (if needed) */ 5573 coarse_mat_is = NULL; 5574 if (csin) { 5575 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5576 if (redist) { 5577 PetscMPIInt rank; 5578 PetscInt spc,n_spc_p1,dest[1],destsize; 5579 5580 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5581 spc = active_procs/ncoarse; 5582 n_spc_p1 = active_procs%ncoarse; 5583 if (im_active) { 5584 destsize = 1; 5585 if (rank > n_spc_p1*(spc+1)-1) { 5586 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5587 } else { 5588 dest[0] = rank/(spc+1); 5589 } 5590 } else { 5591 destsize = 0; 5592 } 5593 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5594 } else if (csin_type_simple) { 5595 PetscMPIInt rank; 5596 PetscInt issize,isidx; 5597 5598 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5599 if (im_active) { 5600 issize = 1; 5601 isidx = (PetscInt)rank; 5602 } else { 5603 issize = 0; 5604 isidx = -1; 5605 } 5606 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5607 } else { /* get a suitable subassembling pattern from MATIS code */ 5608 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5609 } 5610 5611 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5612 if (!redist || ncoarse <= void_procs) { 5613 PetscInt ncoarse_cand,tissize,*nisindices; 5614 PetscInt *coarse_candidates; 5615 const PetscInt* tisindices; 5616 5617 /* get coarse candidates' ranks in pc communicator */ 5618 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5619 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5620 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5621 if (!coarse_candidates[i]) { 5622 coarse_candidates[ncoarse_cand++]=i; 5623 } 5624 } 5625 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5626 5627 5628 if (pcbddc->dbg_flag) { 5629 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5630 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5631 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5632 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5633 for (i=0;i<ncoarse_cand;i++) { 5634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5635 } 5636 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5637 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5638 } 5639 /* shift the pattern on coarse candidates */ 5640 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5641 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5642 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5643 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5644 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5645 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5646 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5647 } 5648 if (pcbddc->dbg_flag) { 5649 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5650 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5651 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5653 } 5654 } 5655 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5656 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5657 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5658 } else { /* this is the last level, so use just receiving processes in subcomm */ 5659 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5660 } 5661 } else { 5662 if (pcbddc->dbg_flag) { 5663 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5664 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5665 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5666 } 5667 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5668 coarse_mat_is = t_coarse_mat_is; 5669 } 5670 5671 /* create local to global scatters for coarse problem */ 5672 if (compute_vecs) { 5673 PetscInt lrows; 5674 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5675 if (coarse_mat_is) { 5676 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5677 } else { 5678 lrows = 0; 5679 } 5680 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5681 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5682 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5683 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5684 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5685 } 5686 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5687 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5688 5689 /* set defaults for coarse KSP and PC */ 5690 if (multilevel_allowed) { 5691 coarse_ksp_type = KSPRICHARDSON; 5692 coarse_pc_type = PCBDDC; 5693 } else { 5694 coarse_ksp_type = KSPPREONLY; 5695 coarse_pc_type = PCREDUNDANT; 5696 } 5697 5698 /* print some info if requested */ 5699 if (pcbddc->dbg_flag) { 5700 if (!multilevel_allowed) { 5701 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5702 if (multilevel_requested) { 5703 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5704 } else if (pcbddc->max_levels) { 5705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5706 } 5707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5708 } 5709 } 5710 5711 /* create the coarse KSP object only once with defaults */ 5712 if (coarse_mat_is) { 5713 MatReuse coarse_mat_reuse; 5714 PetscViewer dbg_viewer = NULL; 5715 if (pcbddc->dbg_flag) { 5716 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5717 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5718 } 5719 if (!pcbddc->coarse_ksp) { 5720 char prefix[256],str_level[16]; 5721 size_t len; 5722 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5723 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5724 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5725 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5726 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5727 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5728 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5729 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5730 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5731 /* prefix */ 5732 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5733 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5734 if (!pcbddc->current_level) { 5735 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5736 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5737 } else { 5738 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5739 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5740 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5741 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5742 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5743 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5744 } 5745 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5746 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5747 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5748 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5749 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5750 /* allow user customization */ 5751 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5752 } 5753 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5754 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5755 if (nisdofs) { 5756 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5757 for (i=0;i<nisdofs;i++) { 5758 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5759 } 5760 } 5761 if (nisneu) { 5762 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5763 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5764 } 5765 if (nisvert) { 5766 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5767 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5768 } 5769 5770 /* get some info after set from options */ 5771 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5772 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5773 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5774 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5775 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5776 isbddc = PETSC_FALSE; 5777 } 5778 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5779 if (isredundant) { 5780 KSP inner_ksp; 5781 PC inner_pc; 5782 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5783 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5784 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5785 } 5786 5787 /* assemble coarse matrix */ 5788 if (coarse_reuse) { 5789 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5790 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5791 coarse_mat_reuse = MAT_REUSE_MATRIX; 5792 } else { 5793 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5794 } 5795 if (isbddc || isnn) { 5796 if (isbddc) { /* currently there are no APIs for these options */ 5797 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5798 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5799 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5800 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5801 if (pcbddc_coarse->benign_compute_nonetflux) { 5802 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5803 } 5804 } 5805 if (pcbddc->coarsening_ratio > 1) { 5806 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5807 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5808 if (pcbddc->dbg_flag) { 5809 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5810 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5811 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5812 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5813 } 5814 } 5815 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5816 } else { 5817 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5818 coarse_mat = coarse_mat_is; 5819 } 5820 } else { 5821 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5822 } 5823 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5824 5825 /* propagate symmetry info of coarse matrix */ 5826 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5827 if (pc->pmat->symmetric_set) { 5828 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5829 } 5830 if (pc->pmat->hermitian_set) { 5831 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5832 } 5833 if (pc->pmat->spd_set) { 5834 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5835 } 5836 /* set operators */ 5837 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5838 if (pcbddc->dbg_flag) { 5839 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5840 } 5841 } else { /* processes non partecipating to coarse solver (if any) */ 5842 coarse_mat = 0; 5843 } 5844 ierr = PetscFree(isarray);CHKERRQ(ierr); 5845 #if 0 5846 { 5847 PetscViewer viewer; 5848 char filename[256]; 5849 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5850 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5851 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5852 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5853 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5854 } 5855 #endif 5856 5857 /* Compute coarse null space (special handling by BDDC only) */ 5858 #if 0 5859 if (pcbddc->NullSpace) { 5860 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5861 } 5862 #endif 5863 /* hack */ 5864 if (pcbddc->coarse_ksp) { 5865 Vec crhs,csol; 5866 5867 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5868 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5869 if (!csol) { 5870 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5871 } 5872 if (!crhs) { 5873 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5874 } 5875 } 5876 5877 /* compute null space for coarse solver if the benign trick has been requested */ 5878 if (pcbddc->benign_null) { 5879 5880 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5881 for (i=0;i<pcbddc->benign_n;i++) { 5882 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5883 } 5884 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5885 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5886 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5887 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5888 if (coarse_mat) { 5889 Vec nullv; 5890 PetscScalar *array,*array2; 5891 PetscInt nl; 5892 5893 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5894 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5895 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5896 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5897 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5898 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5899 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5900 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5901 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5902 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5903 } 5904 } 5905 5906 if (pcbddc->coarse_ksp) { 5907 PetscBool ispreonly; 5908 5909 if (CoarseNullSpace) { 5910 PetscBool isnull; 5911 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5912 if (0) { 5913 if (isbddc && !pcbddc->benign_saddle_point) { 5914 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5915 } else { 5916 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5917 } 5918 } else { 5919 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5920 } 5921 } 5922 /* setup coarse ksp */ 5923 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5924 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5925 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5926 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5927 KSP check_ksp; 5928 KSPType check_ksp_type; 5929 PC check_pc; 5930 Vec check_vec,coarse_vec; 5931 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5932 PetscInt its; 5933 PetscBool compute_eigs; 5934 PetscReal *eigs_r,*eigs_c; 5935 PetscInt neigs; 5936 const char *prefix; 5937 5938 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5939 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5940 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5941 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5942 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5943 if (ispreonly) { 5944 check_ksp_type = KSPPREONLY; 5945 compute_eigs = PETSC_FALSE; 5946 } else { 5947 check_ksp_type = KSPGMRES; 5948 compute_eigs = PETSC_TRUE; 5949 } 5950 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5951 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5952 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5953 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5954 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5955 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5956 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5957 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5958 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5959 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5960 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5961 /* create random vec */ 5962 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5963 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5964 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5965 if (CoarseNullSpace) { 5966 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5967 } 5968 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5969 /* solve coarse problem */ 5970 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5971 if (CoarseNullSpace) { 5972 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5973 } 5974 /* set eigenvalue estimation if preonly has not been requested */ 5975 if (compute_eigs) { 5976 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5977 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5978 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5979 lambda_max = eigs_r[neigs-1]; 5980 lambda_min = eigs_r[0]; 5981 if (pcbddc->use_coarse_estimates) { 5982 if (lambda_max>lambda_min) { 5983 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5984 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5985 } 5986 } 5987 } 5988 5989 /* check coarse problem residual error */ 5990 if (pcbddc->dbg_flag) { 5991 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5992 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5993 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5994 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5995 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5996 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5997 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5998 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5999 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6000 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6001 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6002 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6003 if (CoarseNullSpace) { 6004 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6005 } 6006 if (compute_eigs) { 6007 PetscReal lambda_max_s,lambda_min_s; 6008 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6009 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6010 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6011 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6012 for (i=0;i<neigs;i++) { 6013 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6014 } 6015 } 6016 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6017 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6018 } 6019 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6020 if (compute_eigs) { 6021 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6022 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6023 } 6024 } 6025 } 6026 /* print additional info */ 6027 if (pcbddc->dbg_flag) { 6028 /* waits until all processes reaches this point */ 6029 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6032 } 6033 6034 /* free memory */ 6035 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6036 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6037 PetscFunctionReturn(0); 6038 } 6039 6040 #undef __FUNCT__ 6041 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6042 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6043 { 6044 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6045 PC_IS* pcis = (PC_IS*)pc->data; 6046 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6047 IS subset,subset_mult,subset_n; 6048 PetscInt local_size,coarse_size=0; 6049 PetscInt *local_primal_indices=NULL; 6050 const PetscInt *t_local_primal_indices; 6051 PetscErrorCode ierr; 6052 6053 PetscFunctionBegin; 6054 /* Compute global number of coarse dofs */ 6055 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 6056 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6057 } 6058 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6059 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6060 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6061 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6062 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6063 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6064 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6065 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6066 if (local_size != pcbddc->local_primal_size) { 6067 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 6068 } 6069 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6070 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6071 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6072 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6073 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6074 6075 /* check numbering */ 6076 if (pcbddc->dbg_flag) { 6077 PetscScalar coarsesum,*array,*array2; 6078 PetscInt i; 6079 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6080 6081 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6082 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6083 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6084 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6085 /* counter */ 6086 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6087 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6088 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6089 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6090 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6091 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6092 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6093 for (i=0;i<pcbddc->local_primal_size;i++) { 6094 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6095 } 6096 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6097 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6098 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6099 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6100 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6101 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6102 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6103 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6104 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6105 for (i=0;i<pcis->n;i++) { 6106 if (array[i] != 0.0 && array[i] != array2[i]) { 6107 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6108 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6109 set_error = PETSC_TRUE; 6110 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6111 } 6112 } 6113 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6114 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6115 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6116 for (i=0;i<pcis->n;i++) { 6117 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6118 } 6119 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6120 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6121 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6122 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6123 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6124 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6125 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6126 PetscInt *gidxs; 6127 6128 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6129 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6130 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6131 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6132 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6133 for (i=0;i<pcbddc->local_primal_size;i++) { 6134 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6135 } 6136 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6137 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6138 } 6139 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6140 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6141 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6142 } 6143 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6144 /* get back data */ 6145 *coarse_size_n = coarse_size; 6146 *local_primal_indices_n = local_primal_indices; 6147 PetscFunctionReturn(0); 6148 } 6149 6150 #undef __FUNCT__ 6151 #define __FUNCT__ "PCBDDCGlobalToLocal" 6152 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6153 { 6154 IS localis_t; 6155 PetscInt i,lsize,*idxs,n; 6156 PetscScalar *vals; 6157 PetscErrorCode ierr; 6158 6159 PetscFunctionBegin; 6160 /* get indices in local ordering exploiting local to global map */ 6161 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6162 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6163 for (i=0;i<lsize;i++) vals[i] = 1.0; 6164 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6165 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6166 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6167 if (idxs) { /* multilevel guard */ 6168 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6169 } 6170 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6171 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6172 ierr = PetscFree(vals);CHKERRQ(ierr); 6173 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6174 /* now compute set in local ordering */ 6175 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6176 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6177 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6178 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6179 for (i=0,lsize=0;i<n;i++) { 6180 if (PetscRealPart(vals[i]) > 0.5) { 6181 lsize++; 6182 } 6183 } 6184 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6185 for (i=0,lsize=0;i<n;i++) { 6186 if (PetscRealPart(vals[i]) > 0.5) { 6187 idxs[lsize++] = i; 6188 } 6189 } 6190 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6191 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6192 *localis = localis_t; 6193 PetscFunctionReturn(0); 6194 } 6195 6196 #undef __FUNCT__ 6197 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6198 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6199 { 6200 PC_IS *pcis=(PC_IS*)pc->data; 6201 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6202 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6203 Mat S_j; 6204 PetscInt *used_xadj,*used_adjncy; 6205 PetscBool free_used_adj; 6206 PetscErrorCode ierr; 6207 6208 PetscFunctionBegin; 6209 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6210 free_used_adj = PETSC_FALSE; 6211 if (pcbddc->sub_schurs_layers == -1) { 6212 used_xadj = NULL; 6213 used_adjncy = NULL; 6214 } else { 6215 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6216 used_xadj = pcbddc->mat_graph->xadj; 6217 used_adjncy = pcbddc->mat_graph->adjncy; 6218 } else if (pcbddc->computed_rowadj) { 6219 used_xadj = pcbddc->mat_graph->xadj; 6220 used_adjncy = pcbddc->mat_graph->adjncy; 6221 } else { 6222 PetscBool flg_row=PETSC_FALSE; 6223 const PetscInt *xadj,*adjncy; 6224 PetscInt nvtxs; 6225 6226 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6227 if (flg_row) { 6228 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6229 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6230 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6231 free_used_adj = PETSC_TRUE; 6232 } else { 6233 pcbddc->sub_schurs_layers = -1; 6234 used_xadj = NULL; 6235 used_adjncy = NULL; 6236 } 6237 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6238 } 6239 } 6240 6241 /* setup sub_schurs data */ 6242 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6243 if (!sub_schurs->schur_explicit) { 6244 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6245 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6246 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6247 } else { 6248 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6249 PetscBool isseqaij; 6250 PetscInt benign_n; 6251 Mat change = NULL; 6252 IS change_primal = NULL; 6253 6254 if (!pcbddc->use_vertices && reuse_solvers) { 6255 PetscInt n_vertices; 6256 6257 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6258 reuse_solvers = (PetscBool)!n_vertices; 6259 } 6260 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6261 if (!isseqaij) { 6262 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6263 if (matis->A == pcbddc->local_mat) { 6264 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6265 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6266 } else { 6267 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6268 } 6269 } 6270 if (!pcbddc->benign_change_explicit) { 6271 benign_n = pcbddc->benign_n; 6272 } else { 6273 benign_n = 0; 6274 } 6275 /* If the user defines additional constraints, we import them here. 6276 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6277 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6278 PC_IS *pcisf; 6279 PC_BDDC *pcbddcf; 6280 PC pcf; 6281 6282 if (pcbddc->sub_schurs_rebuild) { 6283 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6284 } 6285 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6286 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6287 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6288 /* hacks */ 6289 pcisf = (PC_IS*)pcf->data; 6290 pcisf->is_B_local = pcis->is_B_local; 6291 pcisf->vec1_N = pcis->vec1_N; 6292 pcisf->BtoNmap = pcis->BtoNmap; 6293 pcisf->n = pcis->n; 6294 pcisf->n_B = pcis->n_B; 6295 pcbddcf = (PC_BDDC*)pcf->data; 6296 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6297 pcbddcf->mat_graph = pcbddc->mat_graph; 6298 pcbddcf->use_faces = PETSC_TRUE; 6299 pcbddcf->use_change_of_basis = PETSC_TRUE; 6300 pcbddcf->use_change_on_faces = PETSC_TRUE; 6301 pcbddcf->use_qr_single = PETSC_TRUE; 6302 pcbddcf->fake_change = PETSC_TRUE; 6303 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6304 /* store information on primal vertices and change of basis (in local numbering) */ 6305 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6306 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6307 change = pcbddcf->ConstraintMatrix; 6308 pcbddcf->ConstraintMatrix = NULL; 6309 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6310 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6311 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6312 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6313 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6314 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6315 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6316 pcf->ops->destroy = NULL; 6317 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6318 } 6319 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6320 ierr = MatDestroy(&change);CHKERRQ(ierr); 6321 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6322 } 6323 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6324 6325 /* free adjacency */ 6326 if (free_used_adj) { 6327 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6328 } 6329 PetscFunctionReturn(0); 6330 } 6331 6332 #undef __FUNCT__ 6333 #define __FUNCT__ "PCBDDCInitSubSchurs" 6334 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6335 { 6336 PC_IS *pcis=(PC_IS*)pc->data; 6337 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6338 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6339 PCBDDCGraph graph; 6340 PetscErrorCode ierr; 6341 6342 PetscFunctionBegin; 6343 /* attach interface graph for determining subsets */ 6344 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6345 IS verticesIS,verticescomm; 6346 PetscInt vsize,*idxs; 6347 6348 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6349 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6350 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6351 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6352 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6353 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6354 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6355 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6356 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6357 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6358 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6359 /* 6360 if (pcbddc->dbg_flag) { 6361 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6362 } 6363 */ 6364 } else { 6365 graph = pcbddc->mat_graph; 6366 } 6367 6368 /* sub_schurs init */ 6369 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6370 6371 /* free graph struct */ 6372 if (pcbddc->sub_schurs_rebuild) { 6373 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6374 } 6375 PetscFunctionReturn(0); 6376 } 6377 6378 #undef __FUNCT__ 6379 #define __FUNCT__ "PCBDDCCheckOperator" 6380 PetscErrorCode PCBDDCCheckOperator(PC pc) 6381 { 6382 PC_IS *pcis=(PC_IS*)pc->data; 6383 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6384 PetscErrorCode ierr; 6385 6386 PetscFunctionBegin; 6387 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6388 IS zerodiag = NULL; 6389 Mat S_j,B0_B=NULL; 6390 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6391 PetscScalar *p0_check,*array,*array2; 6392 PetscReal norm; 6393 PetscInt i; 6394 6395 /* B0 and B0_B */ 6396 if (zerodiag) { 6397 IS dummy; 6398 6399 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6400 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6401 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6402 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6403 } 6404 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6405 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6406 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6407 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6408 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6409 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6410 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6411 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6412 /* S_j */ 6413 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6414 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6415 6416 /* mimic vector in \widetilde{W}_\Gamma */ 6417 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6418 /* continuous in primal space */ 6419 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6420 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6421 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6422 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6423 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6424 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6425 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6426 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6427 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6428 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6429 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6430 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6431 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6432 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6433 6434 /* assemble rhs for coarse problem */ 6435 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6436 /* local with Schur */ 6437 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6438 if (zerodiag) { 6439 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6440 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6441 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6442 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6443 } 6444 /* sum on primal nodes the local contributions */ 6445 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6446 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6447 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6448 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6449 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6450 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6451 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6452 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6453 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6454 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6455 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6456 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6457 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6458 /* scale primal nodes (BDDC sums contibutions) */ 6459 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6460 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6461 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6462 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6463 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6464 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6465 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6466 /* global: \widetilde{B0}_B w_\Gamma */ 6467 if (zerodiag) { 6468 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6469 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6470 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6471 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6472 } 6473 /* BDDC */ 6474 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6475 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6476 6477 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6478 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6479 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6480 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6481 for (i=0;i<pcbddc->benign_n;i++) { 6482 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6483 } 6484 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6485 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6486 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6487 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6488 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6489 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6490 } 6491 PetscFunctionReturn(0); 6492 } 6493