1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 8 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 9 { 10 PCBDDCBenignMatMult_ctx ctx; 11 PetscErrorCode ierr; 12 PetscBool apply_right,apply_left,reset_x; 13 14 PetscFunctionBegin; 15 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 16 if (transpose) { 17 apply_right = ctx->apply_left; 18 apply_left = ctx->apply_right; 19 } else { 20 apply_right = ctx->apply_right; 21 apply_left = ctx->apply_left; 22 } 23 reset_x = PETSC_FALSE; 24 if (apply_right) { 25 const PetscScalar *ax; 26 PetscInt nl,i; 27 28 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 29 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 30 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 31 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 32 for (i=0;i<ctx->benign_n;i++) { 33 PetscScalar sum,val; 34 const PetscInt *idxs; 35 PetscInt nz,j; 36 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 37 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 38 sum = 0.; 39 if (ctx->apply_p0) { 40 val = ctx->work[idxs[nz-1]]; 41 for (j=0;j<nz-1;j++) { 42 sum += ctx->work[idxs[j]]; 43 ctx->work[idxs[j]] += val; 44 } 45 } else { 46 for (j=0;j<nz-1;j++) { 47 sum += ctx->work[idxs[j]]; 48 } 49 } 50 ctx->work[idxs[nz-1]] -= sum; 51 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 54 reset_x = PETSC_TRUE; 55 } 56 if (transpose) { 57 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 58 } else { 59 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 60 } 61 if (reset_x) { 62 ierr = VecResetArray(x);CHKERRQ(ierr); 63 } 64 if (apply_left) { 65 PetscScalar *ay; 66 PetscInt i; 67 68 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 69 for (i=0;i<ctx->benign_n;i++) { 70 PetscScalar sum,val; 71 const PetscInt *idxs; 72 PetscInt nz,j; 73 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 74 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 75 val = -ay[idxs[nz-1]]; 76 if (ctx->apply_p0) { 77 sum = 0.; 78 for (j=0;j<nz-1;j++) { 79 sum += ay[idxs[j]]; 80 ay[idxs[j]] += val; 81 } 82 ay[idxs[nz-1]] += sum; 83 } else { 84 for (j=0;j<nz-1;j++) { 85 ay[idxs[j]] += val; 86 } 87 ay[idxs[nz-1]] = 0.; 88 } 89 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 90 } 91 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 98 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 99 { 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 109 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 110 { 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCBenignShellMat" 120 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 121 { 122 PC_IS *pcis = (PC_IS*)pc->data; 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PCBDDCBenignMatMult_ctx ctx; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!restore) { 129 Mat A_IB,A_BI; 130 PetscScalar *work; 131 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 132 133 if (pcbddc->benign_original_mat) { 134 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 135 } 136 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 137 PetscFunctionReturn(0); 138 } 139 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 140 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 141 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 142 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 143 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 145 ierr = PetscNew(&ctx);CHKERRQ(ierr); 146 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 147 ctx->apply_left = PETSC_TRUE; 148 ctx->apply_right = PETSC_FALSE; 149 ctx->apply_p0 = PETSC_FALSE; 150 ctx->benign_n = pcbddc->benign_n; 151 if (reuse) { 152 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 153 ctx->free = PETSC_FALSE; 154 } else { /* TODO: could be optimized for successive solves */ 155 ISLocalToGlobalMapping N_to_D; 156 PetscInt i; 157 158 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 159 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) { 161 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 162 } 163 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 164 ctx->free = PETSC_TRUE; 165 } 166 ctx->A = pcis->A_IB; 167 ctx->work = work; 168 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 169 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 pcis->A_IB = A_IB; 172 173 /* A_BI as A_IB^T */ 174 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 175 pcbddc->benign_original_mat = pcis->A_BI; 176 pcis->A_BI = A_BI; 177 } else { 178 if (!pcbddc->benign_original_mat) { 179 PetscFunctionReturn(0); 180 } 181 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 182 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 183 pcis->A_IB = ctx->A; 184 ctx->A = NULL; 185 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 186 pcis->A_BI = pcbddc->benign_original_mat; 187 pcbddc->benign_original_mat = NULL; 188 if (ctx->free) { 189 PetscInt i; 190 for (i=0;i<ctx->benign_n;i++) { 191 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 192 } 193 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 194 } 195 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 196 ierr = PetscFree(ctx);CHKERRQ(ierr); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 /* used just in bddc debug mode */ 202 #undef __FUNCT__ 203 #define __FUNCT__ "PCBDDCBenignProject" 204 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 205 { 206 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 207 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 208 Mat An; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 213 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 214 if (is1) { 215 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 216 ierr = MatDestroy(&An);CHKERRQ(ierr); 217 } else { 218 *B = An; 219 } 220 PetscFunctionReturn(0); 221 } 222 223 /* TODO: add reuse flag */ 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatSeqAIJCompress" 226 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 227 { 228 Mat Bt; 229 PetscScalar *a,*bdata; 230 const PetscInt *ii,*ij; 231 PetscInt m,n,i,nnz,*bii,*bij; 232 PetscBool flg_row; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 237 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 238 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 239 nnz = n; 240 for (i=0;i<ii[n];i++) { 241 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 242 } 243 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 244 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 246 nnz = 0; 247 bii[0] = 0; 248 for (i=0;i<n;i++) { 249 PetscInt j; 250 for (j=ii[i];j<ii[i+1];j++) { 251 PetscScalar entry = a[j]; 252 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 253 bij[nnz] = ij[j]; 254 bdata[nnz] = entry; 255 nnz++; 256 } 257 } 258 bii[i+1] = nnz; 259 } 260 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 261 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 262 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 263 { 264 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 265 b->free_a = PETSC_TRUE; 266 b->free_ij = PETSC_TRUE; 267 } 268 *B = Bt; 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDetectDisconnectedComponents" 274 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 275 { 276 Mat B; 277 IS is_dummy,*cc_n; 278 ISLocalToGlobalMapping l2gmap_dummy; 279 PCBDDCGraph graph; 280 PetscInt i,n; 281 PetscInt *xadj,*adjncy; 282 PetscInt *xadj_filtered,*adjncy_filtered; 283 PetscBool flg_row,isseqaij; 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 if (!A->rmap->N || !A->cmap->N) { 288 *ncc = 0; 289 *cc = NULL; 290 PetscFunctionReturn(0); 291 } 292 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 293 if (!isseqaij && filter) { 294 PetscBool isseqdense; 295 296 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 297 if (!isseqdense) { 298 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 299 } else { /* TODO: rectangular case and LDA */ 300 PetscScalar *array; 301 PetscReal chop=1.e-6; 302 303 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 304 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 305 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 306 for (i=0;i<n;i++) { 307 PetscInt j; 308 for (j=i+1;j<n;j++) { 309 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 310 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 311 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 312 } 313 } 314 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 315 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 316 } 317 } else { 318 B = A; 319 } 320 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 321 322 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 323 if (filter) { 324 PetscScalar *data; 325 PetscInt j,cum; 326 327 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 328 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 329 cum = 0; 330 for (i=0;i<n;i++) { 331 PetscInt t; 332 333 for (j=xadj[i];j<xadj[i+1];j++) { 334 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 335 continue; 336 } 337 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 338 } 339 t = xadj_filtered[i]; 340 xadj_filtered[i] = cum; 341 cum += t; 342 } 343 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 344 } else { 345 xadj_filtered = NULL; 346 adjncy_filtered = NULL; 347 } 348 349 /* compute local connected components using PCBDDCGraph */ 350 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 351 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 352 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 353 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 354 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 355 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 356 if (xadj_filtered) { 357 graph->xadj = xadj_filtered; 358 graph->adjncy = adjncy_filtered; 359 } else { 360 graph->xadj = xadj; 361 graph->adjncy = adjncy; 362 } 363 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 365 /* partial clean up */ 366 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 367 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 368 if (A != B) { 369 ierr = MatDestroy(&B);CHKERRQ(ierr); 370 } 371 372 /* get back data */ 373 if (ncc) *ncc = graph->ncc; 374 if (cc) { 375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 376 for (i=0;i<graph->ncc;i++) { 377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 378 } 379 *cc = cc_n; 380 } 381 /* clean up graph */ 382 graph->xadj = 0; 383 graph->adjncy = 0; 384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCBenignCheck" 390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 391 { 392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 393 PC_IS* pcis = (PC_IS*)(pc->data); 394 IS dirIS = NULL; 395 PetscInt i; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 400 if (zerodiag) { 401 Mat A; 402 Vec vec3_N; 403 PetscScalar *vals; 404 const PetscInt *idxs; 405 PetscInt nz,*count; 406 407 /* p0 */ 408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 412 for (i=0;i<nz;i++) vals[i] = 1.; 413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 416 /* v_I */ 417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 418 for (i=0;i<nz;i++) vals[i] = 0.; 419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 425 if (dirIS) { 426 PetscInt n; 427 428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 430 for (i=0;i<n;i++) vals[i] = 0.; 431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 433 } 434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 438 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 441 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 442 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 443 } 444 ierr = PetscFree(vals);CHKERRQ(ierr); 445 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 446 447 /* there should not be any pressure dofs lying on the interface */ 448 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 449 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 450 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 451 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 452 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 453 for (i=0;i<nz;i++) { 454 if (count[idxs[i]]) { 455 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 456 } 457 } 458 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 459 ierr = PetscFree(count);CHKERRQ(ierr); 460 } 461 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 462 463 /* check PCBDDCBenignGetOrSetP0 */ 464 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 465 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 466 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 467 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 468 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 469 for (i=0;i<pcbddc->benign_n;i++) { 470 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 471 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 472 } 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 479 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 480 { 481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 482 IS pressures,zerodiag,*zerodiag_subs; 483 PetscInt nz,n; 484 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 489 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 490 for (n=0;n<pcbddc->benign_n;n++) { 491 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 492 } 493 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 494 pcbddc->benign_n = 0; 495 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 496 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 497 Checks if all the pressure dofs in each subdomain have a zero diagonal 498 If not, a change of basis on pressures is not needed 499 since the local Schur complements are SPD 500 */ 501 has_null_pressures = PETSC_TRUE; 502 have_null = PETSC_TRUE; 503 if (pcbddc->n_ISForDofsLocal) { 504 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 505 506 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 507 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 508 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 509 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 510 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 511 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 512 if (!sorted) { 513 ierr = ISSort(pressures);CHKERRQ(ierr); 514 } 515 } else { 516 pressures = NULL; 517 } 518 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 519 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 520 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 521 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 522 if (!sorted) { 523 ierr = ISSort(zerodiag);CHKERRQ(ierr); 524 } 525 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 526 if (!nz) { 527 if (n) have_null = PETSC_FALSE; 528 has_null_pressures = PETSC_FALSE; 529 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 530 } 531 recompute_zerodiag = PETSC_FALSE; 532 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 533 zerodiag_subs = NULL; 534 pcbddc->benign_n = 0; 535 if (has_null_pressures) { 536 IS *subs; 537 PetscInt nsubs,i; 538 539 subs = pcbddc->local_subs; 540 nsubs = pcbddc->n_local_subs; 541 if (nsubs > 1) { 542 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 543 for (i=0;i<nsubs;i++) { 544 ISLocalToGlobalMapping l2g; 545 IS t_zerodiag_subs; 546 PetscInt nl; 547 548 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 549 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 550 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 551 if (nl) { 552 PetscBool valid = PETSC_TRUE; 553 554 if (pressures) { 555 IS t_pressure_subs; 556 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 557 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 558 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 559 } 560 if (valid) { 561 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 562 pcbddc->benign_n++; 563 } else { 564 recompute_zerodiag = PETSC_TRUE; 565 } 566 } 567 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 568 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 569 } 570 } else { /* there's just one subdomain (or zero if they have not been detected */ 571 PetscBool valid = PETSC_TRUE; 572 if (pressures) { 573 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 574 } 575 if (valid) { 576 pcbddc->benign_n = 1; 577 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 578 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 579 zerodiag_subs[0] = zerodiag; 580 } 581 } 582 } 583 584 if (!pcbddc->benign_n) { 585 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 586 recompute_zerodiag = PETSC_FALSE; 587 has_null_pressures = PETSC_FALSE; 588 have_null = PETSC_FALSE; 589 } 590 591 /* final check for null pressures */ 592 if (zerodiag && pressures) { 593 PetscInt nz,np; 594 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 595 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 596 if (nz != np) have_null = PETSC_FALSE; 597 } 598 599 if (recompute_zerodiag) { 600 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 601 if (pcbddc->benign_n == 1) { 602 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 603 zerodiag = zerodiag_subs[0]; 604 } else { 605 PetscInt i,nzn,*new_idxs; 606 607 nzn = 0; 608 for (i=0;i<pcbddc->benign_n;i++) { 609 PetscInt ns; 610 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 611 nzn += ns; 612 } 613 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 614 nzn = 0; 615 for (i=0;i<pcbddc->benign_n;i++) { 616 PetscInt ns,*idxs; 617 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 618 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 619 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 620 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 621 nzn += ns; 622 } 623 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 624 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 625 } 626 have_null = PETSC_FALSE; 627 } 628 629 /* no-net-flux */ 630 if (pcbddc->benign_compute_nonetflux) { 631 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 632 MatNullSpace near_null_space; 633 ISLocalToGlobalMapping rmap; 634 Vec quad_vec; 635 PetscScalar *pvals; 636 PetscInt i,np,*dummyins; 637 638 /* create vector to hold quadrature weights */ 639 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 640 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 641 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 642 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 643 644 /* compute B^{(i)T} * 1_p */ 645 np = 0; 646 if (zerodiag) { 647 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 648 } else if (pressures) { 649 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 650 } 651 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 652 for (i=0;i<np;i++) pvals[i] = 1.; 653 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 654 if (np) { 655 const PetscInt *pidxs; 656 657 if (zerodiag) { 658 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 659 } else if (pressures) { 660 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 661 } 662 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 663 if (zerodiag) { 664 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 665 } else if (pressures) { 666 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 667 } 668 } 669 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 670 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 671 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 672 ierr = PetscFree(pvals);CHKERRQ(ierr); 673 674 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 675 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 676 for (i=0;i<matis->sf_nroots;i++) matis->sf_rootdata[i] = -1; 677 for (i=0;i<matis->sf_nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 678 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 679 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 680 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 681 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 682 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 683 for (i=0;i<matis->sf_nleaves;i++) { 684 if (PetscGlobalRank != matis->sf_leafdata[i]) { 685 pvals[i] = 0.; 686 } 687 } 688 ierr = PetscMalloc1(matis->sf_nleaves,&dummyins);CHKERRQ(ierr); 689 for (i=0;i<matis->sf_nleaves;i++) dummyins[i] = i; 690 ierr = VecSetValuesLocal(quad_vec,matis->sf_nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 691 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 692 ierr = PetscFree(dummyins);CHKERRQ(ierr); 693 694 /* assembly quadrature vec and attach near null space to pmat */ 695 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 696 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 697 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 698 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 699 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 700 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 701 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 702 } 703 704 /* change of basis and p0 dofs */ 705 if (has_null_pressures) { 706 IS zerodiagc; 707 const PetscInt *idxs,*idxsc; 708 PetscInt i,s,*nnz; 709 710 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 711 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 712 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 713 /* local change of basis for pressures */ 714 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 715 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 716 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 717 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 718 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 719 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 720 for (i=0;i<pcbddc->benign_n;i++) { 721 PetscInt nzs,j; 722 723 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 724 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 725 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 726 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 727 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 728 } 729 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 730 ierr = PetscFree(nnz);CHKERRQ(ierr); 731 /* set identity on velocities */ 732 for (i=0;i<n-nz;i++) { 733 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 734 } 735 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 736 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 738 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 739 /* set change on pressures */ 740 for (s=0;s<pcbddc->benign_n;s++) { 741 PetscScalar *array; 742 PetscInt nzs; 743 744 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 745 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 746 for (i=0;i<nzs-1;i++) { 747 PetscScalar vals[2]; 748 PetscInt cols[2]; 749 750 cols[0] = idxs[i]; 751 cols[1] = idxs[nzs-1]; 752 vals[0] = 1.; 753 vals[1] = 1.; 754 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 755 } 756 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 757 for (i=0;i<nzs-1;i++) array[i] = -1.; 758 array[nzs-1] = 1.; 759 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 760 /* store local idxs for p0 */ 761 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 762 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 763 ierr = PetscFree(array);CHKERRQ(ierr); 764 } 765 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* project if needed */ 768 if (pcbddc->benign_change_explicit) { 769 Mat M; 770 771 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 773 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 774 ierr = MatDestroy(&M);CHKERRQ(ierr); 775 } 776 /* store global idxs for p0 */ 777 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 778 } 779 pcbddc->benign_zerodiag_subs = zerodiag_subs; 780 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 781 782 /* determines if the coarse solver will be singular or not */ 783 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 784 *zerodiaglocal = zerodiag; 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 790 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 791 { 792 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 if (!pcbddc->benign_sf) { 797 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 798 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 799 } 800 if (get) { /* use SF to get values */ 801 PetscScalar *array; 802 803 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 804 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 805 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 806 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 807 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 808 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 809 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 810 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 817 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 818 { 819 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 820 PetscErrorCode ierr; 821 822 PetscFunctionBegin; 823 /* TODO: add error checking 824 - avoid nested pop (or push) calls. 825 - cannot push before pop. 826 - cannot call this if pcbddc->local_mat is NULL 827 */ 828 if (!pcbddc->benign_n) { 829 PetscFunctionReturn(0); 830 } 831 if (pop) { 832 if (pcbddc->benign_change_explicit) { 833 IS is_p0; 834 MatReuse reuse; 835 836 /* extract B_0 */ 837 reuse = MAT_INITIAL_MATRIX; 838 if (pcbddc->benign_B0) { 839 reuse = MAT_REUSE_MATRIX; 840 } 841 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 842 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 843 /* remove rows and cols from local problem */ 844 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 845 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 846 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 848 } else { 849 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 850 PetscScalar *vals; 851 PetscInt i,n,*idxs_ins; 852 853 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 854 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 855 if (!pcbddc->benign_B0) { 856 PetscInt *nnz; 857 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 858 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 859 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 860 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 861 for (i=0;i<pcbddc->benign_n;i++) { 862 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 863 nnz[i] = n - nnz[i]; 864 } 865 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 866 ierr = PetscFree(nnz);CHKERRQ(ierr); 867 } 868 869 for (i=0;i<pcbddc->benign_n;i++) { 870 PetscScalar *array; 871 PetscInt *idxs,j,nz,cum; 872 873 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 874 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 875 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 876 for (j=0;j<nz;j++) vals[j] = 1.; 877 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 878 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 879 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 880 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 881 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 882 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 883 cum = 0; 884 for (j=0;j<n;j++) { 885 if (PetscUnlikely(PetscAbsReal(array[j]) > PETSC_SMALL)) { 886 vals[cum] = array[j]; 887 idxs_ins[cum] = j; 888 cum++; 889 } 890 } 891 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 892 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 894 } 895 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 898 } 899 } else { /* push */ 900 if (pcbddc->benign_change_explicit) { 901 PetscInt i; 902 903 for (i=0;i<pcbddc->benign_n;i++) { 904 PetscScalar *B0_vals; 905 PetscInt *B0_cols,B0_ncol; 906 907 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 908 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 909 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 910 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 911 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 912 } 913 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 } else { 916 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 917 } 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "PCBDDCAdaptiveSelection" 924 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 925 { 926 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 927 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 928 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 929 PetscBLASInt *B_iwork,*B_ifail; 930 PetscScalar *work,lwork; 931 PetscScalar *St,*S,*eigv; 932 PetscScalar *Sarray,*Starray; 933 PetscReal *eigs,thresh; 934 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 935 PetscBool allocated_S_St; 936 #if defined(PETSC_USE_COMPLEX) 937 PetscReal *rwork; 938 #endif 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 943 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 944 945 if (pcbddc->dbg_flag) { 946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 948 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 949 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 950 } 951 952 if (pcbddc->dbg_flag) { 953 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 954 } 955 956 /* max size of subsets */ 957 mss = 0; 958 for (i=0;i<sub_schurs->n_subs;i++) { 959 PetscInt subset_size; 960 961 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 962 mss = PetscMax(mss,subset_size); 963 } 964 965 /* min/max and threshold */ 966 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 967 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 968 nmax = PetscMax(nmin,nmax); 969 allocated_S_St = PETSC_FALSE; 970 if (nmin) { 971 allocated_S_St = PETSC_TRUE; 972 } 973 974 /* allocate lapack workspace */ 975 cum = cum2 = 0; 976 maxneigs = 0; 977 for (i=0;i<sub_schurs->n_subs;i++) { 978 PetscInt n,subset_size; 979 980 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 981 n = PetscMin(subset_size,nmax); 982 cum += subset_size; 983 cum2 += subset_size*n; 984 maxneigs = PetscMax(maxneigs,n); 985 } 986 if (mss) { 987 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 988 PetscBLASInt B_itype = 1; 989 PetscBLASInt B_N = mss; 990 PetscReal zero = 0.0; 991 PetscReal eps = 0.0; /* dlamch? */ 992 993 B_lwork = -1; 994 S = NULL; 995 St = NULL; 996 eigs = NULL; 997 eigv = NULL; 998 B_iwork = NULL; 999 B_ifail = NULL; 1000 #if defined(PETSC_USE_COMPLEX) 1001 rwork = NULL; 1002 #endif 1003 thresh = 1.0; 1004 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1005 #if defined(PETSC_USE_COMPLEX) 1006 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1007 #else 1008 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1009 #endif 1010 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1011 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1012 } else { 1013 /* TODO */ 1014 } 1015 } else { 1016 lwork = 0; 1017 } 1018 1019 nv = 0; 1020 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1021 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1022 } 1023 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1024 if (allocated_S_St) { 1025 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1026 } 1027 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1028 #if defined(PETSC_USE_COMPLEX) 1029 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1030 #endif 1031 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1032 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1033 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1034 nv+cum,&pcbddc->adaptive_constraints_idxs, 1035 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1036 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1037 1038 maxneigs = 0; 1039 cum = cumarray = 0; 1040 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1041 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1042 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1043 const PetscInt *idxs; 1044 1045 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1046 for (cum=0;cum<nv;cum++) { 1047 pcbddc->adaptive_constraints_n[cum] = 1; 1048 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1049 pcbddc->adaptive_constraints_data[cum] = 1.0; 1050 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1051 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1052 } 1053 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1054 } 1055 1056 if (mss) { /* multilevel */ 1057 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1058 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1059 } 1060 1061 thresh = pcbddc->adaptive_threshold; 1062 for (i=0;i<sub_schurs->n_subs;i++) { 1063 const PetscInt *idxs; 1064 PetscReal infty = PETSC_MAX_REAL; 1065 PetscInt j,subset_size,eigs_start = 0; 1066 PetscBLASInt B_N; 1067 PetscBool same_data = PETSC_FALSE; 1068 1069 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1070 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1071 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1072 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1073 if (sub_schurs->is_hermitian) { 1074 PetscInt j,k; 1075 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1076 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1077 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1078 } 1079 for (j=0;j<subset_size;j++) { 1080 for (k=j;k<subset_size;k++) { 1081 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1082 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1083 } 1084 } 1085 } else { 1086 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1087 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1088 } 1089 } else { 1090 S = Sarray + cumarray; 1091 St = Starray + cumarray; 1092 } 1093 /* see if we can save some work */ 1094 if (sub_schurs->n_subs == 1) { 1095 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1096 } 1097 1098 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 1099 B_neigs = 0; 1100 } else { 1101 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1102 PetscBLASInt B_itype = 1; 1103 PetscBLASInt B_IL, B_IU; 1104 PetscReal eps = -1.0; /* dlamch? */ 1105 PetscInt nmin_s; 1106 1107 if (pcbddc->dbg_flag) { 1108 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1109 } 1110 1111 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1112 if (thresh > 1.+PETSC_SMALL) { 1113 1114 /* ask for eigenvalues larger than thresh */ 1115 #if defined(PETSC_USE_COMPLEX) 1116 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1117 #else 1118 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1119 #endif 1120 } else { 1121 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1122 B_IL = 1; 1123 #if defined(PETSC_USE_COMPLEX) 1124 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1125 #else 1126 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1127 #endif 1128 } 1129 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1130 if (B_ierr) { 1131 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1132 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1133 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1134 } 1135 1136 if (B_neigs > nmax) { 1137 if (pcbddc->dbg_flag) { 1138 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1139 } 1140 eigs_start = B_neigs -nmax; 1141 B_neigs = nmax; 1142 } 1143 1144 nmin_s = PetscMin(nmin,B_N); 1145 if (B_neigs < nmin_s) { 1146 PetscBLASInt B_neigs2; 1147 1148 B_IU = B_N - B_neigs; 1149 B_IL = B_N - nmin_s + 1; 1150 if (pcbddc->dbg_flag) { 1151 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1152 } 1153 if (sub_schurs->is_hermitian) { 1154 PetscInt j; 1155 for (j=0;j<subset_size;j++) { 1156 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1157 } 1158 for (j=0;j<subset_size;j++) { 1159 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1160 } 1161 } else { 1162 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1163 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1164 } 1165 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1166 #if defined(PETSC_USE_COMPLEX) 1167 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1168 #else 1169 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1170 #endif 1171 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1172 B_neigs += B_neigs2; 1173 } 1174 if (B_ierr) { 1175 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1176 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1177 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1178 } 1179 if (pcbddc->dbg_flag) { 1180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1181 for (j=0;j<B_neigs;j++) { 1182 if (eigs[j] == 0.0) { 1183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1184 } else { 1185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1186 } 1187 } 1188 } 1189 } else { 1190 /* TODO */ 1191 } 1192 } 1193 /* change the basis back to the original one */ 1194 if (sub_schurs->change) { 1195 Mat change,phi,phit; 1196 1197 if (pcbddc->dbg_flag > 1) { 1198 PetscInt ii; 1199 for (ii=0;ii<B_neigs;ii++) { 1200 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1201 for (j=0;j<B_N;j++) { 1202 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1203 } 1204 } 1205 } 1206 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1207 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1208 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1209 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1210 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1211 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1212 } 1213 maxneigs = PetscMax(B_neigs,maxneigs); 1214 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1215 if (B_neigs) { 1216 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1217 1218 if (pcbddc->dbg_flag > 1) { 1219 PetscInt ii; 1220 for (ii=0;ii<B_neigs;ii++) { 1221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1222 for (j=0;j<B_N;j++) { 1223 #if defined(PETSC_USE_COMPLEX) 1224 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1225 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1226 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1227 #else 1228 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1229 #endif 1230 } 1231 } 1232 } 1233 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1234 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1235 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1236 cum++; 1237 } 1238 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1239 /* shift for next computation */ 1240 cumarray += subset_size*subset_size; 1241 } 1242 if (pcbddc->dbg_flag) { 1243 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1244 } 1245 1246 if (mss) { 1247 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1248 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1249 /* destroy matrices (junk) */ 1250 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1251 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1252 } 1253 if (allocated_S_St) { 1254 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1255 } 1256 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1257 #if defined(PETSC_USE_COMPLEX) 1258 ierr = PetscFree(rwork);CHKERRQ(ierr); 1259 #endif 1260 if (pcbddc->dbg_flag) { 1261 PetscInt maxneigs_r; 1262 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "PCBDDCSetUpSolvers" 1270 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1271 { 1272 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1273 PetscScalar *coarse_submat_vals; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1278 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1279 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1280 1281 /* Setup local neumann solver ksp_R */ 1282 /* PCBDDCSetUpLocalScatters should be called first! */ 1283 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1284 1285 /* Change global null space passed in by the user if change of basis has been requested */ 1286 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1287 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1288 } 1289 1290 /* 1291 Setup local correction and local part of coarse basis. 1292 Gives back the dense local part of the coarse matrix in column major ordering 1293 */ 1294 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1295 1296 /* Compute total number of coarse nodes and setup coarse solver */ 1297 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1298 1299 /* free */ 1300 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "PCBDDCResetCustomization" 1306 PetscErrorCode PCBDDCResetCustomization(PC pc) 1307 { 1308 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1313 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1314 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1315 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1316 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1317 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1318 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1319 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1320 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1321 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1322 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1323 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "PCBDDCResetTopography" 1329 PetscErrorCode PCBDDCResetTopography(PC pc) 1330 { 1331 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1332 PetscInt i; 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1338 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1339 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1341 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1342 for (i=0;i<pcbddc->n_local_subs;i++) { 1343 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1344 } 1345 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1346 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "PCBDDCResetSolvers" 1352 PetscErrorCode PCBDDCResetSolvers(PC pc) 1353 { 1354 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1355 PetscErrorCode ierr; 1356 1357 PetscFunctionBegin; 1358 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1359 if (pcbddc->coarse_phi_B) { 1360 PetscScalar *array; 1361 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1362 ierr = PetscFree(array);CHKERRQ(ierr); 1363 } 1364 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1365 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1366 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1367 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1368 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1369 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1370 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1371 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1372 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1373 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1374 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1375 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1376 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1377 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1378 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1379 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1380 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1381 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1382 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1383 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1384 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1385 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1386 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1387 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1388 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1389 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1390 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1391 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1392 if (pcbddc->benign_zerodiag_subs) { 1393 PetscInt i; 1394 for (i=0;i<pcbddc->benign_n;i++) { 1395 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1396 } 1397 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1405 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1406 { 1407 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1408 PC_IS *pcis = (PC_IS*)pc->data; 1409 VecType impVecType; 1410 PetscInt n_constraints,n_R,old_size; 1411 PetscErrorCode ierr; 1412 1413 PetscFunctionBegin; 1414 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1415 /* get sizes */ 1416 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1417 n_R = pcis->n - pcbddc->n_vertices; 1418 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1419 /* local work vectors (try to avoid unneeded work)*/ 1420 /* R nodes */ 1421 old_size = -1; 1422 if (pcbddc->vec1_R) { 1423 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1424 } 1425 if (n_R != old_size) { 1426 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1427 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1428 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1429 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1430 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1431 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1432 } 1433 /* local primal dofs */ 1434 old_size = -1; 1435 if (pcbddc->vec1_P) { 1436 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1437 } 1438 if (pcbddc->local_primal_size != old_size) { 1439 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1440 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1441 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1442 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1443 } 1444 /* local explicit constraints */ 1445 old_size = -1; 1446 if (pcbddc->vec1_C) { 1447 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1448 } 1449 if (n_constraints && n_constraints != old_size) { 1450 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1451 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1452 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1453 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "PCBDDCSetUpCorrection" 1460 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1461 { 1462 PetscErrorCode ierr; 1463 /* pointers to pcis and pcbddc */ 1464 PC_IS* pcis = (PC_IS*)pc->data; 1465 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1466 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1467 /* submatrices of local problem */ 1468 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1469 /* submatrices of local coarse problem */ 1470 Mat S_VV,S_CV,S_VC,S_CC; 1471 /* working matrices */ 1472 Mat C_CR; 1473 /* additional working stuff */ 1474 PC pc_R; 1475 Mat F; 1476 Vec dummy_vec; 1477 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1478 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1479 PetscScalar *work; 1480 PetscInt *idx_V_B; 1481 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1482 PetscInt i,n_R,n_D,n_B; 1483 1484 /* some shortcuts to scalars */ 1485 PetscScalar one=1.0,m_one=-1.0; 1486 1487 PetscFunctionBegin; 1488 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1489 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1490 } 1491 1492 /* Set Non-overlapping dimensions */ 1493 n_vertices = pcbddc->n_vertices; 1494 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1495 n_B = pcis->n_B; 1496 n_D = pcis->n - n_B; 1497 n_R = pcis->n - n_vertices; 1498 1499 /* vertices in boundary numbering */ 1500 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1501 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1502 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1503 1504 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1505 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1506 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1507 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1508 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1509 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1510 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1511 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1512 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1513 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1514 1515 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1516 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1517 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1520 lda_rhs = n_R; 1521 need_benign_correction = PETSC_FALSE; 1522 if (isLU || isILU || isCHOL) { 1523 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1524 } else if (sub_schurs->reuse_solver) { 1525 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1526 MatFactorType type; 1527 1528 F = reuse_solver->F; 1529 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1530 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1531 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1532 need_benign_correction = !!reuse_solver->benign_n; 1533 } else { 1534 F = NULL; 1535 } 1536 1537 /* allocate workspace */ 1538 n = 0; 1539 if (n_constraints) { 1540 n += lda_rhs*n_constraints; 1541 } 1542 if (n_vertices) { 1543 n = PetscMax(2*lda_rhs*n_vertices,n); 1544 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1545 } 1546 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1547 1548 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1549 dummy_vec = NULL; 1550 if (need_benign_correction && lda_rhs != n_R && F) { 1551 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1552 } 1553 1554 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1555 if (n_constraints) { 1556 Mat M1,M2,M3,C_B; 1557 IS is_aux; 1558 PetscScalar *array,*array2; 1559 1560 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1561 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1562 1563 /* Extract constraints on R nodes: C_{CR} */ 1564 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1565 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1566 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1567 1568 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1569 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1570 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1571 for (i=0;i<n_constraints;i++) { 1572 const PetscScalar *row_cmat_values; 1573 const PetscInt *row_cmat_indices; 1574 PetscInt size_of_constraint,j; 1575 1576 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1577 for (j=0;j<size_of_constraint;j++) { 1578 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1579 } 1580 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1581 } 1582 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1583 if (F) { 1584 Mat B; 1585 1586 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1587 if (need_benign_correction) { 1588 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1589 1590 /* rhs is already zero on interior dofs, no need to change the rhs */ 1591 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1592 } 1593 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1594 if (need_benign_correction) { 1595 PetscScalar *marr; 1596 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1597 1598 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1599 if (lda_rhs != n_R) { 1600 for (i=0;i<n_constraints;i++) { 1601 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1602 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1603 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1604 } 1605 } else { 1606 for (i=0;i<n_constraints;i++) { 1607 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1608 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1609 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1613 } 1614 ierr = MatDestroy(&B);CHKERRQ(ierr); 1615 } else { 1616 PetscScalar *marr; 1617 1618 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1619 for (i=0;i<n_constraints;i++) { 1620 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1621 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1622 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1623 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1624 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1625 } 1626 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1627 } 1628 if (!pcbddc->switch_static) { 1629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1630 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1631 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1632 for (i=0;i<n_constraints;i++) { 1633 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1634 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1635 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1636 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1638 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1639 } 1640 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1641 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1642 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1643 } else { 1644 if (lda_rhs != n_R) { 1645 IS dummy; 1646 1647 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1648 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1649 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1650 } else { 1651 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1652 pcbddc->local_auxmat2 = local_auxmat2_R; 1653 } 1654 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1655 } 1656 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1657 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1658 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1659 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1660 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1661 if (isCHOL) { 1662 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1663 } else { 1664 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1665 } 1666 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1667 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1668 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1669 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1670 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1671 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1672 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1673 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1674 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1675 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1676 } 1677 1678 /* Get submatrices from subdomain matrix */ 1679 if (n_vertices) { 1680 IS is_aux; 1681 1682 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1683 IS tis; 1684 1685 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1686 ierr = ISSort(tis);CHKERRQ(ierr); 1687 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1688 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1689 } else { 1690 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1691 } 1692 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1693 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1694 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1695 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1696 } 1697 1698 /* Matrix of coarse basis functions (local) */ 1699 if (pcbddc->coarse_phi_B) { 1700 PetscInt on_B,on_primal,on_D=n_D; 1701 if (pcbddc->coarse_phi_D) { 1702 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1703 } 1704 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1705 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1706 PetscScalar *marray; 1707 1708 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1709 ierr = PetscFree(marray);CHKERRQ(ierr); 1710 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1711 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1712 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1713 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1714 } 1715 } 1716 1717 if (!pcbddc->coarse_phi_B) { 1718 PetscScalar *marray; 1719 1720 n = n_B*pcbddc->local_primal_size; 1721 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1722 n += n_D*pcbddc->local_primal_size; 1723 } 1724 if (!pcbddc->symmetric_primal) { 1725 n *= 2; 1726 } 1727 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1728 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1729 n = n_B*pcbddc->local_primal_size; 1730 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1731 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1732 n += n_D*pcbddc->local_primal_size; 1733 } 1734 if (!pcbddc->symmetric_primal) { 1735 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1736 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1737 n = n_B*pcbddc->local_primal_size; 1738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1739 } 1740 } else { 1741 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1742 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1743 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1744 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1745 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1746 } 1747 } 1748 } 1749 1750 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1751 p0_lidx_I = NULL; 1752 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1753 const PetscInt *idxs; 1754 1755 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1756 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1757 for (i=0;i<pcbddc->benign_n;i++) { 1758 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1759 } 1760 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1761 } 1762 1763 /* vertices */ 1764 if (n_vertices) { 1765 1766 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1767 1768 if (n_R) { 1769 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1770 PetscBLASInt B_N,B_one = 1; 1771 PetscScalar *x,*y; 1772 PetscBool isseqaij; 1773 1774 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1775 if (need_benign_correction) { 1776 ISLocalToGlobalMapping RtoN; 1777 IS is_p0; 1778 PetscInt *idxs_p0,n; 1779 1780 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1781 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1782 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1783 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1784 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1785 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1786 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1787 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1788 } 1789 1790 if (lda_rhs == n_R) { 1791 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1792 } else { 1793 PetscScalar *av,*array; 1794 const PetscInt *xadj,*adjncy; 1795 PetscInt n; 1796 PetscBool flg_row; 1797 1798 array = work+lda_rhs*n_vertices; 1799 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1800 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1801 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1802 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1803 for (i=0;i<n;i++) { 1804 PetscInt j; 1805 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1806 } 1807 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1808 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1809 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1810 } 1811 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1812 if (need_benign_correction) { 1813 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1814 PetscScalar *marr; 1815 1816 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1817 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1818 1819 | 0 0 0 | (V) 1820 L = | 0 0 -1 | (P-p0) 1821 | 0 0 -1 | (p0) 1822 1823 */ 1824 for (i=0;i<reuse_solver->benign_n;i++) { 1825 const PetscScalar *vals; 1826 const PetscInt *idxs,*idxs_zero; 1827 PetscInt n,j,nz; 1828 1829 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1830 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1831 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1832 for (j=0;j<n;j++) { 1833 PetscScalar val = vals[j]; 1834 PetscInt k,col = idxs[j]; 1835 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1836 } 1837 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1838 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1839 } 1840 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1841 } 1842 if (F) { 1843 /* need to correct the rhs */ 1844 if (need_benign_correction) { 1845 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1846 PetscScalar *marr; 1847 1848 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1849 if (lda_rhs != n_R) { 1850 for (i=0;i<n_vertices;i++) { 1851 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1852 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1853 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1854 } 1855 } else { 1856 for (i=0;i<n_vertices;i++) { 1857 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1858 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1859 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1860 } 1861 } 1862 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1863 } 1864 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1865 /* need to correct the solution */ 1866 if (need_benign_correction) { 1867 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1868 PetscScalar *marr; 1869 1870 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1871 if (lda_rhs != n_R) { 1872 for (i=0;i<n_vertices;i++) { 1873 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1874 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1875 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1876 } 1877 } else { 1878 for (i=0;i<n_vertices;i++) { 1879 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1880 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1881 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1882 } 1883 } 1884 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1885 } 1886 } else { 1887 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1888 for (i=0;i<n_vertices;i++) { 1889 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1890 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1891 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1892 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1893 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1894 } 1895 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1896 } 1897 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1898 /* S_VV and S_CV */ 1899 if (n_constraints) { 1900 Mat B; 1901 1902 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1903 for (i=0;i<n_vertices;i++) { 1904 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1905 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1906 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1907 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1908 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1909 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1910 } 1911 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1912 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1913 ierr = MatDestroy(&B);CHKERRQ(ierr); 1914 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1915 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1916 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1917 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1918 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1919 ierr = MatDestroy(&B);CHKERRQ(ierr); 1920 } 1921 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1922 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1923 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1924 } 1925 if (lda_rhs != n_R) { 1926 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1927 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1928 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1929 } 1930 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1931 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1932 if (need_benign_correction) { 1933 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1934 PetscScalar *marr,*sums; 1935 1936 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1937 ierr = MatDenseGetArray(S_VVt,&marr); 1938 for (i=0;i<reuse_solver->benign_n;i++) { 1939 const PetscScalar *vals; 1940 const PetscInt *idxs,*idxs_zero; 1941 PetscInt n,j,nz; 1942 1943 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1944 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1945 for (j=0;j<n_vertices;j++) { 1946 PetscInt k; 1947 sums[j] = 0.; 1948 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1949 } 1950 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1951 for (j=0;j<n;j++) { 1952 PetscScalar val = vals[j]; 1953 PetscInt k; 1954 for (k=0;k<n_vertices;k++) { 1955 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1956 } 1957 } 1958 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1959 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1960 } 1961 ierr = PetscFree(sums);CHKERRQ(ierr); 1962 ierr = MatDenseRestoreArray(S_VVt,&marr); 1963 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 1964 } 1965 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1966 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1967 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1968 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1969 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1970 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1971 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1972 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1973 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1974 } else { 1975 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1976 } 1977 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1978 1979 /* coarse basis functions */ 1980 for (i=0;i<n_vertices;i++) { 1981 PetscScalar *y; 1982 1983 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1984 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1985 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1988 y[n_B*i+idx_V_B[i]] = 1.0; 1989 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1990 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1991 1992 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1993 PetscInt j; 1994 1995 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1996 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1997 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1998 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1999 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2000 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2001 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2002 } 2003 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2004 } 2005 /* if n_R == 0 the object is not destroyed */ 2006 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2007 } 2008 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2009 2010 if (n_constraints) { 2011 Mat B; 2012 2013 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2014 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2015 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2016 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2017 if (n_vertices) { 2018 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2019 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2020 } else { 2021 Mat S_VCt; 2022 2023 if (lda_rhs != n_R) { 2024 ierr = MatDestroy(&B);CHKERRQ(ierr); 2025 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2026 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2027 } 2028 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2029 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2030 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2031 } 2032 } 2033 ierr = MatDestroy(&B);CHKERRQ(ierr); 2034 /* coarse basis functions */ 2035 for (i=0;i<n_constraints;i++) { 2036 PetscScalar *y; 2037 2038 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2039 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2040 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2041 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2042 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2043 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2044 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2045 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2046 PetscInt j; 2047 2048 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2049 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2050 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2051 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2052 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2053 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2054 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2055 } 2056 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2057 } 2058 } 2059 if (n_constraints) { 2060 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2061 } 2062 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2063 2064 /* coarse matrix entries relative to B_0 */ 2065 if (pcbddc->benign_n) { 2066 Mat B0_B,B0_BPHI; 2067 IS is_dummy; 2068 PetscScalar *data; 2069 PetscInt j; 2070 2071 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2072 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2073 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2074 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2075 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2076 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2077 for (j=0;j<pcbddc->benign_n;j++) { 2078 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2079 for (i=0;i<pcbddc->local_primal_size;i++) { 2080 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2081 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2082 } 2083 } 2084 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2085 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2086 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2087 } 2088 2089 /* compute other basis functions for non-symmetric problems */ 2090 if (!pcbddc->symmetric_primal) { 2091 Mat B_V=NULL,B_C=NULL; 2092 PetscScalar *marray; 2093 2094 if (n_constraints) { 2095 Mat S_CCT,C_CRT; 2096 2097 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2098 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2099 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2100 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2101 if (n_vertices) { 2102 Mat S_VCT; 2103 2104 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2105 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2106 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2107 } 2108 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2109 } 2110 if (n_vertices && n_R) { 2111 PetscScalar *av,*marray; 2112 const PetscInt *xadj,*adjncy; 2113 PetscInt n; 2114 PetscBool flg_row; 2115 2116 /* B_V = B_V - A_VR^T */ 2117 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2118 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2119 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2120 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2121 for (i=0;i<n;i++) { 2122 PetscInt j; 2123 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2124 } 2125 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2126 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2127 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2128 } 2129 2130 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2131 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2132 for (i=0;i<n_vertices;i++) { 2133 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2134 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2135 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2136 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2137 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2138 } 2139 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2140 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2141 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2142 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2143 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2144 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2145 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2146 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2147 } 2148 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2149 /* coarse basis functions */ 2150 for (i=0;i<pcbddc->local_primal_size;i++) { 2151 PetscScalar *y; 2152 2153 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2154 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2155 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2156 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2157 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2158 if (i<n_vertices) { 2159 y[n_B*i+idx_V_B[i]] = 1.0; 2160 } 2161 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2162 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2163 2164 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2165 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2166 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2167 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2168 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2169 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2170 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2171 } 2172 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2173 } 2174 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2175 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2176 } 2177 /* free memory */ 2178 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2179 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2180 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2181 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2182 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2183 ierr = PetscFree(work);CHKERRQ(ierr); 2184 if (n_vertices) { 2185 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2186 } 2187 if (n_constraints) { 2188 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2189 } 2190 /* Checking coarse_sub_mat and coarse basis functios */ 2191 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2192 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2193 if (pcbddc->dbg_flag) { 2194 Mat coarse_sub_mat; 2195 Mat AUXMAT,TM1,TM2,TM3,TM4; 2196 Mat coarse_phi_D,coarse_phi_B; 2197 Mat coarse_psi_D,coarse_psi_B; 2198 Mat A_II,A_BB,A_IB,A_BI; 2199 Mat C_B,CPHI; 2200 IS is_dummy; 2201 Vec mones; 2202 MatType checkmattype=MATSEQAIJ; 2203 PetscReal real_value; 2204 2205 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2206 Mat A; 2207 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2208 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2209 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2210 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2211 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2212 ierr = MatDestroy(&A);CHKERRQ(ierr); 2213 } else { 2214 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2215 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2216 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2217 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2218 } 2219 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2220 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2221 if (!pcbddc->symmetric_primal) { 2222 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2223 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2224 } 2225 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2226 2227 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2228 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2229 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2230 if (!pcbddc->symmetric_primal) { 2231 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2232 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2233 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2234 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2235 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2236 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2237 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2238 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2239 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2240 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2241 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2242 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2243 } else { 2244 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2245 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2246 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2247 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2248 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2249 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2250 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2251 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2252 } 2253 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2254 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2255 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2256 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2257 if (pcbddc->benign_n) { 2258 Mat B0_B,B0_BPHI; 2259 PetscScalar *data,*data2; 2260 PetscInt j; 2261 2262 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2263 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2264 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2265 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2266 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2267 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2268 for (j=0;j<pcbddc->benign_n;j++) { 2269 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2270 for (i=0;i<pcbddc->local_primal_size;i++) { 2271 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2272 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2273 } 2274 } 2275 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2276 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2277 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2278 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2279 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2280 } 2281 #if 0 2282 { 2283 PetscViewer viewer; 2284 char filename[256]; 2285 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2286 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2287 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2288 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2289 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2290 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2291 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2292 if (save_change) { 2293 Mat phi_B; 2294 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2295 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2296 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2297 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2298 } else { 2299 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2300 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2301 } 2302 if (pcbddc->coarse_phi_D) { 2303 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2304 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2305 } 2306 if (pcbddc->coarse_psi_B) { 2307 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2308 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2309 } 2310 if (pcbddc->coarse_psi_D) { 2311 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2312 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2313 } 2314 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2315 } 2316 #endif 2317 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2318 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2319 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2320 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2321 2322 /* check constraints */ 2323 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2324 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2325 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2326 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2327 } else { 2328 PetscScalar *data; 2329 Mat tmat; 2330 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2331 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2332 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2333 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2334 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2335 } 2336 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2337 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2338 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2339 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2340 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2341 if (!pcbddc->symmetric_primal) { 2342 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2343 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2344 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2345 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2347 } 2348 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2349 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2350 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2351 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2352 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2353 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2354 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2355 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2356 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2357 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2358 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2359 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2360 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2361 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2362 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2363 if (!pcbddc->symmetric_primal) { 2364 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2365 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2366 } 2367 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2368 } 2369 /* get back data */ 2370 *coarse_submat_vals_n = coarse_submat_vals; 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2376 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2377 { 2378 Mat *work_mat; 2379 IS isrow_s,iscol_s; 2380 PetscBool rsorted,csorted; 2381 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2382 PetscErrorCode ierr; 2383 2384 PetscFunctionBegin; 2385 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2386 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2387 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2388 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2389 2390 if (!rsorted) { 2391 const PetscInt *idxs; 2392 PetscInt *idxs_sorted,i; 2393 2394 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2395 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2396 for (i=0;i<rsize;i++) { 2397 idxs_perm_r[i] = i; 2398 } 2399 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2400 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2401 for (i=0;i<rsize;i++) { 2402 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2403 } 2404 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2405 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2406 } else { 2407 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2408 isrow_s = isrow; 2409 } 2410 2411 if (!csorted) { 2412 if (isrow == iscol) { 2413 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2414 iscol_s = isrow_s; 2415 } else { 2416 const PetscInt *idxs; 2417 PetscInt *idxs_sorted,i; 2418 2419 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2420 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2421 for (i=0;i<csize;i++) { 2422 idxs_perm_c[i] = i; 2423 } 2424 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2425 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2426 for (i=0;i<csize;i++) { 2427 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2428 } 2429 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2430 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2431 } 2432 } else { 2433 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2434 iscol_s = iscol; 2435 } 2436 2437 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2438 2439 if (!rsorted || !csorted) { 2440 Mat new_mat; 2441 IS is_perm_r,is_perm_c; 2442 2443 if (!rsorted) { 2444 PetscInt *idxs_r,i; 2445 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2446 for (i=0;i<rsize;i++) { 2447 idxs_r[idxs_perm_r[i]] = i; 2448 } 2449 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2450 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2451 } else { 2452 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2453 } 2454 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2455 2456 if (!csorted) { 2457 if (isrow_s == iscol_s) { 2458 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2459 is_perm_c = is_perm_r; 2460 } else { 2461 PetscInt *idxs_c,i; 2462 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2463 for (i=0;i<csize;i++) { 2464 idxs_c[idxs_perm_c[i]] = i; 2465 } 2466 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2467 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2468 } 2469 } else { 2470 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2471 } 2472 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2473 2474 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2475 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2476 work_mat[0] = new_mat; 2477 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2478 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2479 } 2480 2481 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2482 *B = work_mat[0]; 2483 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2484 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2485 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2491 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2492 { 2493 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2494 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2495 Mat new_mat; 2496 IS is_local,is_global; 2497 PetscInt local_size; 2498 PetscBool isseqaij; 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2503 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2504 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2505 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2506 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2507 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2508 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2509 2510 /* check */ 2511 if (pcbddc->dbg_flag) { 2512 Vec x,x_change; 2513 PetscReal error; 2514 2515 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2516 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2517 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2518 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2519 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2520 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2521 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2522 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2523 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2524 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2525 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2526 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2527 ierr = VecDestroy(&x);CHKERRQ(ierr); 2528 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2529 } 2530 2531 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2532 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2533 if (isseqaij) { 2534 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2535 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2536 } else { 2537 Mat work_mat; 2538 2539 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2540 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2541 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2542 } 2543 if (matis->A->symmetric_set) { 2544 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2545 #if !defined(PETSC_USE_COMPLEX) 2546 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2547 #endif 2548 } 2549 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2555 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2556 { 2557 PC_IS* pcis = (PC_IS*)(pc->data); 2558 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2559 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2560 PetscInt *idx_R_local=NULL; 2561 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2562 PetscInt vbs,bs; 2563 PetscBT bitmask=NULL; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 /* 2568 No need to setup local scatters if 2569 - primal space is unchanged 2570 AND 2571 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2572 AND 2573 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2574 */ 2575 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2576 PetscFunctionReturn(0); 2577 } 2578 /* destroy old objects */ 2579 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2580 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2581 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2582 /* Set Non-overlapping dimensions */ 2583 n_B = pcis->n_B; 2584 n_D = pcis->n - n_B; 2585 n_vertices = pcbddc->n_vertices; 2586 2587 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2588 2589 /* create auxiliary bitmask and allocate workspace */ 2590 if (!sub_schurs->reuse_solver) { 2591 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2592 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2593 for (i=0;i<n_vertices;i++) { 2594 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2595 } 2596 2597 for (i=0, n_R=0; i<pcis->n; i++) { 2598 if (!PetscBTLookup(bitmask,i)) { 2599 idx_R_local[n_R++] = i; 2600 } 2601 } 2602 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2603 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2604 2605 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2606 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2607 } 2608 2609 /* Block code */ 2610 vbs = 1; 2611 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2612 if (bs>1 && !(n_vertices%bs)) { 2613 PetscBool is_blocked = PETSC_TRUE; 2614 PetscInt *vary; 2615 if (!sub_schurs->reuse_solver) { 2616 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2617 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2618 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2619 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2620 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2621 for (i=0; i<pcis->n/bs; i++) { 2622 if (vary[i]!=0 && vary[i]!=bs) { 2623 is_blocked = PETSC_FALSE; 2624 break; 2625 } 2626 } 2627 ierr = PetscFree(vary);CHKERRQ(ierr); 2628 } else { 2629 /* Verify directly the R set */ 2630 for (i=0; i<n_R/bs; i++) { 2631 PetscInt j,node=idx_R_local[bs*i]; 2632 for (j=1; j<bs; j++) { 2633 if (node != idx_R_local[bs*i+j]-j) { 2634 is_blocked = PETSC_FALSE; 2635 break; 2636 } 2637 } 2638 } 2639 } 2640 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2641 vbs = bs; 2642 for (i=0;i<n_R/vbs;i++) { 2643 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2644 } 2645 } 2646 } 2647 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2648 if (sub_schurs->reuse_solver) { 2649 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2650 2651 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2652 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2653 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2654 reuse_solver->is_R = pcbddc->is_R_local; 2655 } else { 2656 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2657 } 2658 2659 /* print some info if requested */ 2660 if (pcbddc->dbg_flag) { 2661 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2662 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2663 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2664 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2665 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2666 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2667 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2668 } 2669 2670 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2671 if (!sub_schurs->reuse_solver) { 2672 IS is_aux1,is_aux2; 2673 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2674 2675 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2676 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2677 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2678 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2679 for (i=0; i<n_D; i++) { 2680 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2681 } 2682 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2683 for (i=0, j=0; i<n_R; i++) { 2684 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2685 aux_array1[j++] = i; 2686 } 2687 } 2688 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2689 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2690 for (i=0, j=0; i<n_B; i++) { 2691 if (!PetscBTLookup(bitmask,is_indices[i])) { 2692 aux_array2[j++] = i; 2693 } 2694 } 2695 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2696 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2697 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2698 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2699 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2700 2701 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2702 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2703 for (i=0, j=0; i<n_R; i++) { 2704 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2705 aux_array1[j++] = i; 2706 } 2707 } 2708 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2709 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2710 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2711 } 2712 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2713 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2714 } else { 2715 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2716 IS tis; 2717 PetscInt schur_size; 2718 2719 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2720 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2721 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2722 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2723 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2724 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2725 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2726 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2727 } 2728 } 2729 PetscFunctionReturn(0); 2730 } 2731 2732 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2735 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2736 { 2737 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2738 PC_IS *pcis = (PC_IS*)pc->data; 2739 PC pc_temp; 2740 Mat A_RR; 2741 MatReuse reuse; 2742 PetscScalar m_one = -1.0; 2743 PetscReal value; 2744 PetscInt n_D,n_R; 2745 PetscBool use_exact,use_exact_reduced,issbaij; 2746 PetscErrorCode ierr; 2747 /* prefixes stuff */ 2748 char dir_prefix[256],neu_prefix[256],str_level[16]; 2749 size_t len; 2750 2751 PetscFunctionBegin; 2752 2753 /* compute prefixes */ 2754 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2755 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2756 if (!pcbddc->current_level) { 2757 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2758 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2759 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2760 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2761 } else { 2762 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2763 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2764 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2765 len -= 15; /* remove "pc_bddc_coarse_" */ 2766 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2767 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2768 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2769 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2770 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2771 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2772 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2773 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2774 } 2775 2776 /* DIRICHLET PROBLEM */ 2777 if (dirichlet) { 2778 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2779 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2780 if (!sub_schurs->reuse_solver) { 2781 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2782 } 2783 if (pcbddc->dbg_flag) { 2784 Mat A_IIn; 2785 2786 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2787 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2788 pcis->A_II = A_IIn; 2789 } 2790 } 2791 if (pcbddc->local_mat->symmetric_set) { 2792 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2793 } 2794 /* Matrix for Dirichlet problem is pcis->A_II */ 2795 n_D = pcis->n - pcis->n_B; 2796 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2797 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2798 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2799 /* default */ 2800 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2801 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2802 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2803 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2804 if (issbaij) { 2805 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2806 } else { 2807 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2808 } 2809 /* Allow user's customization */ 2810 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2811 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2812 } 2813 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2814 if (sub_schurs->reuse_solver) { 2815 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2816 2817 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2818 } 2819 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2820 if (!n_D) { 2821 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2822 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2823 } 2824 /* Set Up KSP for Dirichlet problem of BDDC */ 2825 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2826 /* set ksp_D into pcis data */ 2827 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2828 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2829 pcis->ksp_D = pcbddc->ksp_D; 2830 } 2831 2832 /* NEUMANN PROBLEM */ 2833 A_RR = 0; 2834 if (neumann) { 2835 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2836 PetscInt ibs,mbs; 2837 PetscBool issbaij; 2838 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2839 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2840 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2841 if (pcbddc->ksp_R) { /* already created ksp */ 2842 PetscInt nn_R; 2843 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2844 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2845 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2846 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2847 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2848 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2849 reuse = MAT_INITIAL_MATRIX; 2850 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2851 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2852 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2853 reuse = MAT_INITIAL_MATRIX; 2854 } else { /* safe to reuse the matrix */ 2855 reuse = MAT_REUSE_MATRIX; 2856 } 2857 } 2858 /* last check */ 2859 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2860 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2861 reuse = MAT_INITIAL_MATRIX; 2862 } 2863 } else { /* first time, so we need to create the matrix */ 2864 reuse = MAT_INITIAL_MATRIX; 2865 } 2866 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2867 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2868 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2869 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2870 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2871 if (matis->A == pcbddc->local_mat) { 2872 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2873 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2874 } else { 2875 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2876 } 2877 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2878 if (matis->A == pcbddc->local_mat) { 2879 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2880 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2881 } else { 2882 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2883 } 2884 } 2885 /* extract A_RR */ 2886 if (sub_schurs->reuse_solver) { 2887 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2888 2889 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2890 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2891 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2892 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2893 } else { 2894 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2895 } 2896 } else { 2897 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2898 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2899 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2900 } 2901 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2902 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2903 } 2904 if (pcbddc->local_mat->symmetric_set) { 2905 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2906 } 2907 if (!pcbddc->ksp_R) { /* create object if not present */ 2908 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2909 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2910 /* default */ 2911 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2912 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2913 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2914 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2915 if (issbaij) { 2916 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2917 } else { 2918 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2919 } 2920 /* Allow user's customization */ 2921 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2922 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2923 } 2924 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2925 if (!n_R) { 2926 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2927 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2928 } 2929 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2930 /* Reuse solver if it is present */ 2931 if (sub_schurs->reuse_solver) { 2932 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2933 2934 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2935 } 2936 /* Set Up KSP for Neumann problem of BDDC */ 2937 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2938 } 2939 2940 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2941 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2942 if (pcbddc->dbg_flag) { 2943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2944 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2946 } 2947 if (dirichlet) { /* Dirichlet */ 2948 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2949 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2950 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2951 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2952 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2953 /* need to be adapted? */ 2954 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2955 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2956 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2957 /* print info */ 2958 if (pcbddc->dbg_flag) { 2959 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2960 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2961 } 2962 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2963 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2964 } 2965 } 2966 if (neumann) { /* Neumann */ 2967 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2968 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2969 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2970 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2971 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2972 /* need to be adapted? */ 2973 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2974 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2975 /* print info */ 2976 if (pcbddc->dbg_flag) { 2977 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2978 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2979 } 2980 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2981 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2982 } 2983 } 2984 } 2985 /* free Neumann problem's matrix */ 2986 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2992 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2993 { 2994 PetscErrorCode ierr; 2995 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2996 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2997 2998 PetscFunctionBegin; 2999 if (!sub_schurs->reuse_solver) { 3000 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3001 } 3002 if (!pcbddc->switch_static) { 3003 if (applytranspose && pcbddc->local_auxmat1) { 3004 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3005 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3006 } 3007 if (!sub_schurs->reuse_solver) { 3008 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3009 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3010 } else { 3011 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3012 3013 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3014 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3015 } 3016 } else { 3017 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3018 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3019 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3020 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3021 if (applytranspose && pcbddc->local_auxmat1) { 3022 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3023 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3024 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3025 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3026 } 3027 } 3028 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3029 if (applytranspose) { 3030 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3031 } else { 3032 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3033 } 3034 } else { 3035 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3036 3037 if (applytranspose) { 3038 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3039 } else { 3040 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3041 } 3042 } 3043 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3044 if (!pcbddc->switch_static) { 3045 if (!sub_schurs->reuse_solver) { 3046 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3047 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3048 } else { 3049 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3050 3051 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3052 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3053 } 3054 if (!applytranspose && pcbddc->local_auxmat1) { 3055 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3056 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3057 } 3058 } else { 3059 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3060 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3061 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3062 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3063 if (!applytranspose && pcbddc->local_auxmat1) { 3064 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3065 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3066 } 3067 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3068 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3069 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3070 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3071 } 3072 PetscFunctionReturn(0); 3073 } 3074 3075 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3078 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3079 { 3080 PetscErrorCode ierr; 3081 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3082 PC_IS* pcis = (PC_IS*) (pc->data); 3083 const PetscScalar zero = 0.0; 3084 3085 PetscFunctionBegin; 3086 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3087 if (!pcbddc->benign_apply_coarse_only) { 3088 if (applytranspose) { 3089 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3090 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3091 } else { 3092 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3093 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3094 } 3095 } else { 3096 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3097 } 3098 3099 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3100 if (pcbddc->benign_n) { 3101 PetscScalar *array; 3102 PetscInt j; 3103 3104 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3105 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3106 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3107 } 3108 3109 /* start communications from local primal nodes to rhs of coarse solver */ 3110 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3111 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3112 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3113 3114 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3115 /* TODO remove null space when doing multilevel */ 3116 if (pcbddc->coarse_ksp) { 3117 Mat coarse_mat; 3118 Vec rhs,sol; 3119 MatNullSpace nullsp; 3120 3121 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3122 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3123 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3124 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3125 if (nullsp) { 3126 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3127 } 3128 if (applytranspose) { 3129 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3130 } else { 3131 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3132 } 3133 if (nullsp) { 3134 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3135 } 3136 } 3137 3138 /* Local solution on R nodes */ 3139 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3140 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3141 } 3142 /* communications from coarse sol to local primal nodes */ 3143 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3144 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3145 3146 /* Sum contributions from the two levels */ 3147 if (!pcbddc->benign_apply_coarse_only) { 3148 if (applytranspose) { 3149 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3150 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3151 } else { 3152 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3153 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3154 } 3155 /* store p0 */ 3156 if (pcbddc->benign_n) { 3157 PetscScalar *array; 3158 PetscInt j; 3159 3160 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3161 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3162 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3163 } 3164 } else { /* expand the coarse solution */ 3165 if (applytranspose) { 3166 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3167 } else { 3168 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3169 } 3170 } 3171 PetscFunctionReturn(0); 3172 } 3173 3174 #undef __FUNCT__ 3175 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3176 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3177 { 3178 PetscErrorCode ierr; 3179 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3180 PetscScalar *array; 3181 Vec from,to; 3182 3183 PetscFunctionBegin; 3184 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3185 from = pcbddc->coarse_vec; 3186 to = pcbddc->vec1_P; 3187 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3188 Vec tvec; 3189 3190 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3191 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3192 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3193 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3194 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3195 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3196 } 3197 } else { /* from local to global -> put data in coarse right hand side */ 3198 from = pcbddc->vec1_P; 3199 to = pcbddc->coarse_vec; 3200 } 3201 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3202 PetscFunctionReturn(0); 3203 } 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3207 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3208 { 3209 PetscErrorCode ierr; 3210 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3211 PetscScalar *array; 3212 Vec from,to; 3213 3214 PetscFunctionBegin; 3215 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3216 from = pcbddc->coarse_vec; 3217 to = pcbddc->vec1_P; 3218 } else { /* from local to global -> put data in coarse right hand side */ 3219 from = pcbddc->vec1_P; 3220 to = pcbddc->coarse_vec; 3221 } 3222 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3223 if (smode == SCATTER_FORWARD) { 3224 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3225 Vec tvec; 3226 3227 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3228 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3229 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3230 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3231 } 3232 } else { 3233 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3234 ierr = VecResetArray(from);CHKERRQ(ierr); 3235 } 3236 } 3237 PetscFunctionReturn(0); 3238 } 3239 3240 /* uncomment for testing purposes */ 3241 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3244 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3245 { 3246 PetscErrorCode ierr; 3247 PC_IS* pcis = (PC_IS*)(pc->data); 3248 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3249 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3250 /* one and zero */ 3251 PetscScalar one=1.0,zero=0.0; 3252 /* space to store constraints and their local indices */ 3253 PetscScalar *constraints_data; 3254 PetscInt *constraints_idxs,*constraints_idxs_B; 3255 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3256 PetscInt *constraints_n; 3257 /* iterators */ 3258 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3259 /* BLAS integers */ 3260 PetscBLASInt lwork,lierr; 3261 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3262 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3263 /* reuse */ 3264 PetscInt olocal_primal_size,olocal_primal_size_cc; 3265 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3266 /* change of basis */ 3267 PetscBool qr_needed; 3268 PetscBT change_basis,qr_needed_idx; 3269 /* auxiliary stuff */ 3270 PetscInt *nnz,*is_indices; 3271 PetscInt ncc; 3272 /* some quantities */ 3273 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3274 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3275 3276 PetscFunctionBegin; 3277 /* Destroy Mat objects computed previously */ 3278 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3279 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3280 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3281 /* save info on constraints from previous setup (if any) */ 3282 olocal_primal_size = pcbddc->local_primal_size; 3283 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3284 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3285 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3286 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3287 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3288 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3289 3290 /* print some info */ 3291 if (pcbddc->dbg_flag) { 3292 IS vertices; 3293 PetscInt nv,nedges,nfaces; 3294 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 3295 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 3296 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 3297 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3299 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 3301 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 3302 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3303 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3304 } 3305 3306 if (!pcbddc->adaptive_selection) { 3307 IS ISForVertices,*ISForFaces,*ISForEdges; 3308 MatNullSpace nearnullsp; 3309 const Vec *nearnullvecs; 3310 Vec *localnearnullsp; 3311 PetscScalar *array; 3312 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3313 PetscBool nnsp_has_cnst; 3314 /* LAPACK working arrays for SVD or POD */ 3315 PetscBool skip_lapack,boolforchange; 3316 PetscScalar *work; 3317 PetscReal *singular_vals; 3318 #if defined(PETSC_USE_COMPLEX) 3319 PetscReal *rwork; 3320 #endif 3321 #if defined(PETSC_MISSING_LAPACK_GESVD) 3322 PetscScalar *temp_basis,*correlation_mat; 3323 #else 3324 PetscBLASInt dummy_int=1; 3325 PetscScalar dummy_scalar=1.; 3326 #endif 3327 3328 /* Get index sets for faces, edges and vertices from graph */ 3329 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3330 /* free unneeded index sets */ 3331 if (!pcbddc->use_vertices) { 3332 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3333 } 3334 if (!pcbddc->use_edges) { 3335 for (i=0;i<n_ISForEdges;i++) { 3336 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3337 } 3338 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3339 n_ISForEdges = 0; 3340 } 3341 if (!pcbddc->use_faces) { 3342 for (i=0;i<n_ISForFaces;i++) { 3343 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3344 } 3345 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3346 n_ISForFaces = 0; 3347 } 3348 3349 #if defined(PETSC_USE_DEBUG) 3350 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3351 Also use_change_of_basis should be consistent among processors */ 3352 if (pcbddc->NullSpace) { 3353 PetscBool tbool[2],gbool[2]; 3354 3355 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3356 pcbddc->use_change_of_basis = PETSC_TRUE; 3357 if (!ISForEdges) { 3358 pcbddc->use_change_on_faces = PETSC_TRUE; 3359 } 3360 } 3361 tbool[0] = pcbddc->use_change_of_basis; 3362 tbool[1] = pcbddc->use_change_on_faces; 3363 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3364 pcbddc->use_change_of_basis = gbool[0]; 3365 pcbddc->use_change_on_faces = gbool[1]; 3366 } 3367 #endif 3368 3369 /* check if near null space is attached to global mat */ 3370 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3371 if (nearnullsp) { 3372 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3373 /* remove any stored info */ 3374 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3375 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3376 /* store information for BDDC solver reuse */ 3377 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3378 pcbddc->onearnullspace = nearnullsp; 3379 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3380 for (i=0;i<nnsp_size;i++) { 3381 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3382 } 3383 } else { /* if near null space is not provided BDDC uses constants by default */ 3384 nnsp_size = 0; 3385 nnsp_has_cnst = PETSC_TRUE; 3386 } 3387 /* get max number of constraints on a single cc */ 3388 max_constraints = nnsp_size; 3389 if (nnsp_has_cnst) max_constraints++; 3390 3391 /* 3392 Evaluate maximum storage size needed by the procedure 3393 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3394 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3395 There can be multiple constraints per connected component 3396 */ 3397 n_vertices = 0; 3398 if (ISForVertices) { 3399 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3400 } 3401 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3402 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3403 3404 total_counts = n_ISForFaces+n_ISForEdges; 3405 total_counts *= max_constraints; 3406 total_counts += n_vertices; 3407 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3408 3409 total_counts = 0; 3410 max_size_of_constraint = 0; 3411 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3412 IS used_is; 3413 if (i<n_ISForEdges) { 3414 used_is = ISForEdges[i]; 3415 } else { 3416 used_is = ISForFaces[i-n_ISForEdges]; 3417 } 3418 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3419 total_counts += j; 3420 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3421 } 3422 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3423 3424 /* get local part of global near null space vectors */ 3425 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3426 for (k=0;k<nnsp_size;k++) { 3427 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3428 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3429 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3430 } 3431 3432 /* whether or not to skip lapack calls */ 3433 skip_lapack = PETSC_TRUE; 3434 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3435 3436 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3437 if (!skip_lapack) { 3438 PetscScalar temp_work; 3439 3440 #if defined(PETSC_MISSING_LAPACK_GESVD) 3441 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3442 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3443 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3444 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3445 #if defined(PETSC_USE_COMPLEX) 3446 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3447 #endif 3448 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3449 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3450 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3451 lwork = -1; 3452 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3453 #if !defined(PETSC_USE_COMPLEX) 3454 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3455 #else 3456 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3457 #endif 3458 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3459 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3460 #else /* on missing GESVD */ 3461 /* SVD */ 3462 PetscInt max_n,min_n; 3463 max_n = max_size_of_constraint; 3464 min_n = max_constraints; 3465 if (max_size_of_constraint < max_constraints) { 3466 min_n = max_size_of_constraint; 3467 max_n = max_constraints; 3468 } 3469 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3470 #if defined(PETSC_USE_COMPLEX) 3471 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3472 #endif 3473 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3474 lwork = -1; 3475 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3476 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3477 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3478 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3479 #if !defined(PETSC_USE_COMPLEX) 3480 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3481 #else 3482 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3483 #endif 3484 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3485 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3486 #endif /* on missing GESVD */ 3487 /* Allocate optimal workspace */ 3488 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3489 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3490 } 3491 /* Now we can loop on constraining sets */ 3492 total_counts = 0; 3493 constraints_idxs_ptr[0] = 0; 3494 constraints_data_ptr[0] = 0; 3495 /* vertices */ 3496 if (n_vertices) { 3497 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3498 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3499 for (i=0;i<n_vertices;i++) { 3500 constraints_n[total_counts] = 1; 3501 constraints_data[total_counts] = 1.0; 3502 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3503 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3504 total_counts++; 3505 } 3506 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3507 n_vertices = total_counts; 3508 } 3509 3510 /* edges and faces */ 3511 total_counts_cc = total_counts; 3512 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3513 IS used_is; 3514 PetscBool idxs_copied = PETSC_FALSE; 3515 3516 if (ncc<n_ISForEdges) { 3517 used_is = ISForEdges[ncc]; 3518 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3519 } else { 3520 used_is = ISForFaces[ncc-n_ISForEdges]; 3521 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3522 } 3523 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3524 3525 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3526 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3527 /* change of basis should not be performed on local periodic nodes */ 3528 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3529 if (nnsp_has_cnst) { 3530 PetscScalar quad_value; 3531 3532 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3533 idxs_copied = PETSC_TRUE; 3534 3535 if (!pcbddc->use_nnsp_true) { 3536 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3537 } else { 3538 quad_value = 1.0; 3539 } 3540 for (j=0;j<size_of_constraint;j++) { 3541 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3542 } 3543 temp_constraints++; 3544 total_counts++; 3545 } 3546 for (k=0;k<nnsp_size;k++) { 3547 PetscReal real_value; 3548 PetscScalar *ptr_to_data; 3549 3550 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3551 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3552 for (j=0;j<size_of_constraint;j++) { 3553 ptr_to_data[j] = array[is_indices[j]]; 3554 } 3555 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3556 /* check if array is null on the connected component */ 3557 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3558 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3559 if (real_value > 0.0) { /* keep indices and values */ 3560 temp_constraints++; 3561 total_counts++; 3562 if (!idxs_copied) { 3563 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3564 idxs_copied = PETSC_TRUE; 3565 } 3566 } 3567 } 3568 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3569 valid_constraints = temp_constraints; 3570 if (!pcbddc->use_nnsp_true && temp_constraints) { 3571 if (temp_constraints == 1) { /* just normalize the constraint */ 3572 PetscScalar norm,*ptr_to_data; 3573 3574 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3575 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3576 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3577 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3578 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3579 } else { /* perform SVD */ 3580 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3581 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3582 3583 #if defined(PETSC_MISSING_LAPACK_GESVD) 3584 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3585 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3586 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3587 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3588 from that computed using LAPACKgesvd 3589 -> This is due to a different computation of eigenvectors in LAPACKheev 3590 -> The quality of the POD-computed basis will be the same */ 3591 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3592 /* Store upper triangular part of correlation matrix */ 3593 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3594 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3595 for (j=0;j<temp_constraints;j++) { 3596 for (k=0;k<j+1;k++) { 3597 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3598 } 3599 } 3600 /* compute eigenvalues and eigenvectors of correlation matrix */ 3601 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3602 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3603 #if !defined(PETSC_USE_COMPLEX) 3604 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3605 #else 3606 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3607 #endif 3608 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3609 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3610 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3611 j = 0; 3612 while (j < temp_constraints && singular_vals[j] < tol) j++; 3613 total_counts = total_counts-j; 3614 valid_constraints = temp_constraints-j; 3615 /* scale and copy POD basis into used quadrature memory */ 3616 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3617 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3618 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3619 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3620 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3621 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3622 if (j<temp_constraints) { 3623 PetscInt ii; 3624 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3625 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3626 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3627 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3628 for (k=0;k<temp_constraints-j;k++) { 3629 for (ii=0;ii<size_of_constraint;ii++) { 3630 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3631 } 3632 } 3633 } 3634 #else /* on missing GESVD */ 3635 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3636 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3638 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3639 #if !defined(PETSC_USE_COMPLEX) 3640 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3641 #else 3642 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3643 #endif 3644 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3645 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3646 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3647 k = temp_constraints; 3648 if (k > size_of_constraint) k = size_of_constraint; 3649 j = 0; 3650 while (j < k && singular_vals[k-j-1] < tol) j++; 3651 valid_constraints = k-j; 3652 total_counts = total_counts-temp_constraints+valid_constraints; 3653 #endif /* on missing GESVD */ 3654 } 3655 } 3656 /* update pointers information */ 3657 if (valid_constraints) { 3658 constraints_n[total_counts_cc] = valid_constraints; 3659 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3660 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3661 /* set change_of_basis flag */ 3662 if (boolforchange) { 3663 PetscBTSet(change_basis,total_counts_cc); 3664 } 3665 total_counts_cc++; 3666 } 3667 } 3668 /* free workspace */ 3669 if (!skip_lapack) { 3670 ierr = PetscFree(work);CHKERRQ(ierr); 3671 #if defined(PETSC_USE_COMPLEX) 3672 ierr = PetscFree(rwork);CHKERRQ(ierr); 3673 #endif 3674 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3675 #if defined(PETSC_MISSING_LAPACK_GESVD) 3676 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3677 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3678 #endif 3679 } 3680 for (k=0;k<nnsp_size;k++) { 3681 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3682 } 3683 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3684 /* free index sets of faces, edges and vertices */ 3685 for (i=0;i<n_ISForFaces;i++) { 3686 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3687 } 3688 if (n_ISForFaces) { 3689 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3690 } 3691 for (i=0;i<n_ISForEdges;i++) { 3692 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3693 } 3694 if (n_ISForEdges) { 3695 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3696 } 3697 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3698 } else { 3699 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3700 3701 total_counts = 0; 3702 n_vertices = 0; 3703 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3704 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3705 } 3706 max_constraints = 0; 3707 total_counts_cc = 0; 3708 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3709 total_counts += pcbddc->adaptive_constraints_n[i]; 3710 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3711 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3712 } 3713 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3714 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3715 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3716 constraints_data = pcbddc->adaptive_constraints_data; 3717 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3718 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3719 total_counts_cc = 0; 3720 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3721 if (pcbddc->adaptive_constraints_n[i]) { 3722 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3723 } 3724 } 3725 #if 0 3726 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3727 for (i=0;i<total_counts_cc;i++) { 3728 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3729 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3730 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3731 printf(" %d",constraints_idxs[j]); 3732 } 3733 printf("\n"); 3734 printf("number of cc: %d\n",constraints_n[i]); 3735 } 3736 for (i=0;i<n_vertices;i++) { 3737 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3738 } 3739 for (i=0;i<sub_schurs->n_subs;i++) { 3740 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3741 } 3742 #endif 3743 3744 max_size_of_constraint = 0; 3745 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3746 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3747 /* Change of basis */ 3748 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3749 if (pcbddc->use_change_of_basis) { 3750 for (i=0;i<sub_schurs->n_subs;i++) { 3751 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3752 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3753 } 3754 } 3755 } 3756 } 3757 pcbddc->local_primal_size = total_counts; 3758 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3759 3760 /* map constraints_idxs in boundary numbering */ 3761 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3762 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3763 3764 /* Create constraint matrix */ 3765 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3766 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3767 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3768 3769 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3770 /* determine if a QR strategy is needed for change of basis */ 3771 qr_needed = PETSC_FALSE; 3772 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3773 total_primal_vertices=0; 3774 pcbddc->local_primal_size_cc = 0; 3775 for (i=0;i<total_counts_cc;i++) { 3776 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3777 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3778 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3779 pcbddc->local_primal_size_cc += 1; 3780 } else if (PetscBTLookup(change_basis,i)) { 3781 for (k=0;k<constraints_n[i];k++) { 3782 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3783 } 3784 pcbddc->local_primal_size_cc += constraints_n[i]; 3785 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3786 PetscBTSet(qr_needed_idx,i); 3787 qr_needed = PETSC_TRUE; 3788 } 3789 } else { 3790 pcbddc->local_primal_size_cc += 1; 3791 } 3792 } 3793 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3794 pcbddc->n_vertices = total_primal_vertices; 3795 /* permute indices in order to have a sorted set of vertices */ 3796 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3797 3798 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3799 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3800 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3801 3802 /* nonzero structure of constraint matrix */ 3803 /* and get reference dof for local constraints */ 3804 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3805 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3806 3807 j = total_primal_vertices; 3808 total_counts = total_primal_vertices; 3809 cum = total_primal_vertices; 3810 for (i=n_vertices;i<total_counts_cc;i++) { 3811 if (!PetscBTLookup(change_basis,i)) { 3812 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3813 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3814 cum++; 3815 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3816 for (k=0;k<constraints_n[i];k++) { 3817 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3818 nnz[j+k] = size_of_constraint; 3819 } 3820 j += constraints_n[i]; 3821 } 3822 } 3823 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3824 ierr = PetscFree(nnz);CHKERRQ(ierr); 3825 3826 /* set values in constraint matrix */ 3827 for (i=0;i<total_primal_vertices;i++) { 3828 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3829 } 3830 total_counts = total_primal_vertices; 3831 for (i=n_vertices;i<total_counts_cc;i++) { 3832 if (!PetscBTLookup(change_basis,i)) { 3833 PetscInt *cols; 3834 3835 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3836 cols = constraints_idxs+constraints_idxs_ptr[i]; 3837 for (k=0;k<constraints_n[i];k++) { 3838 PetscInt row = total_counts+k; 3839 PetscScalar *vals; 3840 3841 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3842 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3843 } 3844 total_counts += constraints_n[i]; 3845 } 3846 } 3847 /* assembling */ 3848 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3849 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3850 3851 /* 3852 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3853 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3854 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3855 */ 3856 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3857 if (pcbddc->use_change_of_basis) { 3858 /* dual and primal dofs on a single cc */ 3859 PetscInt dual_dofs,primal_dofs; 3860 /* working stuff for GEQRF */ 3861 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3862 PetscBLASInt lqr_work; 3863 /* working stuff for UNGQR */ 3864 PetscScalar *gqr_work,lgqr_work_t; 3865 PetscBLASInt lgqr_work; 3866 /* working stuff for TRTRS */ 3867 PetscScalar *trs_rhs; 3868 PetscBLASInt Blas_NRHS; 3869 /* pointers for values insertion into change of basis matrix */ 3870 PetscInt *start_rows,*start_cols; 3871 PetscScalar *start_vals; 3872 /* working stuff for values insertion */ 3873 PetscBT is_primal; 3874 PetscInt *aux_primal_numbering_B; 3875 /* matrix sizes */ 3876 PetscInt global_size,local_size; 3877 /* temporary change of basis */ 3878 Mat localChangeOfBasisMatrix; 3879 /* extra space for debugging */ 3880 PetscScalar *dbg_work; 3881 3882 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3883 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3884 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3885 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3886 /* nonzeros for local mat */ 3887 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3888 if (!pcbddc->benign_change || pcbddc->fake_change) { 3889 for (i=0;i<pcis->n;i++) nnz[i]=1; 3890 } else { 3891 const PetscInt *ii; 3892 PetscInt n; 3893 PetscBool flg_row; 3894 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3895 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3896 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3897 } 3898 for (i=n_vertices;i<total_counts_cc;i++) { 3899 if (PetscBTLookup(change_basis,i)) { 3900 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3901 if (PetscBTLookup(qr_needed_idx,i)) { 3902 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3903 } else { 3904 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3905 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3906 } 3907 } 3908 } 3909 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3910 ierr = PetscFree(nnz);CHKERRQ(ierr); 3911 /* Set interior change in the matrix */ 3912 if (!pcbddc->benign_change || pcbddc->fake_change) { 3913 for (i=0;i<pcis->n;i++) { 3914 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3915 } 3916 } else { 3917 const PetscInt *ii,*jj; 3918 PetscScalar *aa; 3919 PetscInt n; 3920 PetscBool flg_row; 3921 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3922 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3923 for (i=0;i<n;i++) { 3924 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3925 } 3926 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3927 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3928 } 3929 3930 if (pcbddc->dbg_flag) { 3931 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3933 } 3934 3935 3936 /* Now we loop on the constraints which need a change of basis */ 3937 /* 3938 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3939 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3940 3941 Basic blocks of change of basis matrix T computed by 3942 3943 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3944 3945 | 1 0 ... 0 s_1/S | 3946 | 0 1 ... 0 s_2/S | 3947 | ... | 3948 | 0 ... 1 s_{n-1}/S | 3949 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3950 3951 with S = \sum_{i=1}^n s_i^2 3952 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3953 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3954 3955 - QR decomposition of constraints otherwise 3956 */ 3957 if (qr_needed) { 3958 /* space to store Q */ 3959 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3960 /* first we issue queries for optimal work */ 3961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3962 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3963 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3964 lqr_work = -1; 3965 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3967 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3968 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3969 lgqr_work = -1; 3970 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3971 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3972 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3973 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3974 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3975 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3976 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3977 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3978 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3979 /* array to store scaling factors for reflectors */ 3980 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3981 /* array to store rhs and solution of triangular solver */ 3982 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3983 /* allocating workspace for check */ 3984 if (pcbddc->dbg_flag) { 3985 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3986 } 3987 } 3988 /* array to store whether a node is primal or not */ 3989 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3990 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3991 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3992 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 3993 for (i=0;i<total_primal_vertices;i++) { 3994 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3995 } 3996 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3997 3998 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3999 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4000 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4001 if (PetscBTLookup(change_basis,total_counts)) { 4002 /* get constraint info */ 4003 primal_dofs = constraints_n[total_counts]; 4004 dual_dofs = size_of_constraint-primal_dofs; 4005 4006 if (pcbddc->dbg_flag) { 4007 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4008 } 4009 4010 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4011 4012 /* copy quadrature constraints for change of basis check */ 4013 if (pcbddc->dbg_flag) { 4014 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4015 } 4016 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4017 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4018 4019 /* compute QR decomposition of constraints */ 4020 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4021 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4022 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4023 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4024 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4025 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4026 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4027 4028 /* explictly compute R^-T */ 4029 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4030 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4031 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4032 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4033 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4034 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4035 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4036 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4037 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4038 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4039 4040 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4041 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4042 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4043 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4045 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4046 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4047 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4048 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4049 4050 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4051 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4052 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4054 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4055 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4056 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4057 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4058 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4059 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4060 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4061 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4062 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4063 4064 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4065 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4066 /* insert cols for primal dofs */ 4067 for (j=0;j<primal_dofs;j++) { 4068 start_vals = &qr_basis[j*size_of_constraint]; 4069 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4070 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4071 } 4072 /* insert cols for dual dofs */ 4073 for (j=0,k=0;j<dual_dofs;k++) { 4074 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4075 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4076 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4077 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4078 j++; 4079 } 4080 } 4081 4082 /* check change of basis */ 4083 if (pcbddc->dbg_flag) { 4084 PetscInt ii,jj; 4085 PetscBool valid_qr=PETSC_TRUE; 4086 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4088 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4091 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4092 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4093 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4094 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4095 for (jj=0;jj<size_of_constraint;jj++) { 4096 for (ii=0;ii<primal_dofs;ii++) { 4097 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4098 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4099 } 4100 } 4101 if (!valid_qr) { 4102 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4103 for (jj=0;jj<size_of_constraint;jj++) { 4104 for (ii=0;ii<primal_dofs;ii++) { 4105 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4106 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4107 } 4108 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4109 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4110 } 4111 } 4112 } 4113 } else { 4114 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4115 } 4116 } 4117 } else { /* simple transformation block */ 4118 PetscInt row,col; 4119 PetscScalar val,norm; 4120 4121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4122 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4123 for (j=0;j<size_of_constraint;j++) { 4124 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4125 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4126 if (!PetscBTLookup(is_primal,row_B)) { 4127 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4128 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4129 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4130 } else { 4131 for (k=0;k<size_of_constraint;k++) { 4132 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4133 if (row != col) { 4134 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4135 } else { 4136 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4137 } 4138 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4139 } 4140 } 4141 } 4142 if (pcbddc->dbg_flag) { 4143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4144 } 4145 } 4146 } else { 4147 if (pcbddc->dbg_flag) { 4148 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4149 } 4150 } 4151 } 4152 4153 /* free workspace */ 4154 if (qr_needed) { 4155 if (pcbddc->dbg_flag) { 4156 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4157 } 4158 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4159 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4160 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4161 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4162 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4163 } 4164 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4165 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4166 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4167 4168 /* assembling of global change of variable */ 4169 if (!pcbddc->fake_change) { 4170 Mat tmat; 4171 PetscInt bs; 4172 4173 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4174 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4175 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4176 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4177 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4178 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4179 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4180 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4181 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4182 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4183 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4184 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4185 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4186 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4187 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4188 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4189 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4190 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4191 4192 /* check */ 4193 if (pcbddc->dbg_flag) { 4194 PetscReal error; 4195 Vec x,x_change; 4196 4197 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4198 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4199 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4200 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4201 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4202 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4203 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4204 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4205 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4206 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4207 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4208 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4209 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4210 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4211 ierr = VecDestroy(&x);CHKERRQ(ierr); 4212 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4213 } 4214 /* adapt sub_schurs computed (if any) */ 4215 if (pcbddc->use_deluxe_scaling) { 4216 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4217 4218 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4219 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4220 } 4221 if (sub_schurs->S_Ej_all) { 4222 Mat S_new,tmat; 4223 IS is_all_N,is_V_Sall = NULL; 4224 4225 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4226 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4227 if (pcbddc->deluxe_zerorows) { 4228 ISLocalToGlobalMapping NtoSall; 4229 IS is_V; 4230 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4231 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4232 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4233 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4234 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4235 } 4236 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4237 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4238 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4239 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4240 if (pcbddc->deluxe_zerorows) { 4241 const PetscScalar *array; 4242 const PetscInt *idxs_V,*idxs_all; 4243 PetscInt i,n_V; 4244 4245 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4246 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4247 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4248 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4249 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4250 for (i=0;i<n_V;i++) { 4251 PetscScalar val; 4252 PetscInt idx; 4253 4254 idx = idxs_V[i]; 4255 val = array[idxs_all[idxs_V[i]]]; 4256 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4257 } 4258 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4259 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4260 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4261 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4262 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4263 } 4264 sub_schurs->S_Ej_all = S_new; 4265 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4266 if (sub_schurs->sum_S_Ej_all) { 4267 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4268 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4269 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4270 if (pcbddc->deluxe_zerorows) { 4271 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4272 } 4273 sub_schurs->sum_S_Ej_all = S_new; 4274 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4275 } 4276 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4277 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4278 } 4279 /* destroy any change of basis context in sub_schurs */ 4280 if (sub_schurs->change) { 4281 PetscInt i; 4282 4283 for (i=0;i<sub_schurs->n_subs;i++) { 4284 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4285 } 4286 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4287 } 4288 } 4289 if (pcbddc->switch_static) { /* need to save the local change */ 4290 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4291 } else { 4292 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4293 } 4294 /* determine if any process has changed the pressures locally */ 4295 if (pcbddc->benign_saddle_point) { 4296 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4297 ierr = MPI_Allreduce(&have_null,&pcbddc->change_interior,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4298 } 4299 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4300 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4301 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4302 pcbddc->use_qr_single = qr_needed; 4303 } 4304 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4305 PetscBool needglobal = PETSC_FALSE; 4306 if (pcbddc->benign_saddle_point) { 4307 PetscBool have_null = (PetscBool)!!pcbddc->benign_change; 4308 ierr = MPI_Allreduce(&have_null,&needglobal,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4309 } 4310 if (!needglobal && pcbddc->user_ChangeOfBasisMatrix) { 4311 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4312 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4313 } else { 4314 Mat benign_global = NULL; 4315 if (needglobal) { 4316 Mat tmat; 4317 4318 pcbddc->change_interior = PETSC_TRUE; 4319 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4320 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4321 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4322 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4323 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4324 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4325 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4326 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4327 if (pcbddc->benign_change) { 4328 Mat M; 4329 4330 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4331 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4332 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4333 ierr = MatDestroy(&M);CHKERRQ(ierr); 4334 } else { 4335 Mat eye; 4336 PetscScalar *array; 4337 4338 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4339 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4340 for (i=0;i<pcis->n;i++) { 4341 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4342 } 4343 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4344 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4345 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4346 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4347 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4348 } 4349 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4350 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4351 } 4352 if (pcbddc->user_ChangeOfBasisMatrix) { 4353 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4354 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4355 } else if (needglobal) { 4356 pcbddc->ChangeOfBasisMatrix = benign_global; 4357 } 4358 } 4359 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4360 IS is_global; 4361 const PetscInt *gidxs; 4362 4363 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4364 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4365 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4366 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4367 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4368 } 4369 } 4370 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4371 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4372 } 4373 4374 if (!pcbddc->fake_change) { 4375 /* add pressure dofs to set of primal nodes for numbering purposes */ 4376 for (i=0;i<pcbddc->benign_n;i++) { 4377 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4378 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4379 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4380 pcbddc->local_primal_size_cc++; 4381 pcbddc->local_primal_size++; 4382 } 4383 4384 /* check if a new primal space has been introduced (also take into account benign trick) */ 4385 pcbddc->new_primal_space_local = PETSC_TRUE; 4386 if (olocal_primal_size == pcbddc->local_primal_size) { 4387 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4388 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4389 if (!pcbddc->new_primal_space_local) { 4390 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4391 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4392 } 4393 } 4394 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4395 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4396 } 4397 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4398 4399 /* flush dbg viewer */ 4400 if (pcbddc->dbg_flag) { 4401 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4402 } 4403 4404 /* free workspace */ 4405 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4406 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4407 if (!pcbddc->adaptive_selection) { 4408 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4409 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4410 } else { 4411 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4412 pcbddc->adaptive_constraints_idxs_ptr, 4413 pcbddc->adaptive_constraints_data_ptr, 4414 pcbddc->adaptive_constraints_idxs, 4415 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4416 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4417 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4418 } 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4424 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4425 { 4426 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4427 PC_IS *pcis = (PC_IS*)pc->data; 4428 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4429 PetscInt ierr,i,N; 4430 PetscViewer viewer=pcbddc->dbg_viewer; 4431 4432 PetscFunctionBegin; 4433 /* Reset previously computed graph */ 4434 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4435 /* Init local Graph struct */ 4436 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4437 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4438 4439 /* Check validity of the csr graph passed in by the user */ 4440 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4441 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4442 } 4443 4444 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4445 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4446 PetscInt *xadj,*adjncy; 4447 PetscInt nvtxs; 4448 PetscBool flg_row=PETSC_FALSE; 4449 4450 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4451 if (flg_row) { 4452 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4453 pcbddc->computed_rowadj = PETSC_TRUE; 4454 } 4455 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4456 } 4457 if (pcbddc->dbg_flag) { 4458 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4459 } 4460 4461 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4462 if (pcbddc->user_provided_isfordofs) { 4463 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4464 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4465 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4466 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4467 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4468 } 4469 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4470 pcbddc->n_ISForDofs = 0; 4471 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4472 } 4473 } else { 4474 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4475 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4476 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4477 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4478 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4479 } 4480 } 4481 } 4482 4483 /* Setup of Graph */ 4484 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4485 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4486 } 4487 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4488 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4489 } 4490 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4491 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4492 } 4493 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4494 4495 /* attach info on disconnected subdomains if present */ 4496 if (pcbddc->n_local_subs) { 4497 PetscInt *local_subs; 4498 4499 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4500 for (i=0;i<pcbddc->n_local_subs;i++) { 4501 const PetscInt *idxs; 4502 PetscInt nl,j; 4503 4504 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4505 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4506 for (j=0;j<nl;j++) { 4507 local_subs[idxs[j]] = i; 4508 } 4509 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4510 } 4511 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4512 pcbddc->mat_graph->local_subs = local_subs; 4513 } 4514 4515 /* Graph's connected components analysis */ 4516 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4517 4518 /* print some info to stdout */ 4519 if (pcbddc->dbg_flag) { 4520 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 4521 } 4522 PetscFunctionReturn(0); 4523 } 4524 4525 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4526 #undef __FUNCT__ 4527 #define __FUNCT__ "PCBDDCSubsetNumbering" 4528 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4529 { 4530 PetscSF sf; 4531 PetscLayout map; 4532 const PetscInt *idxs; 4533 PetscInt *leaf_data,*root_data,*gidxs; 4534 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4535 PetscInt n_n,nlocals,start,first_index; 4536 PetscMPIInt commsize; 4537 PetscBool first_found; 4538 PetscErrorCode ierr; 4539 4540 PetscFunctionBegin; 4541 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4542 if (subset_mult) { 4543 PetscCheckSameComm(subset,1,subset_mult,2); 4544 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4545 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4546 } 4547 /* create workspace layout for computing global indices of subset */ 4548 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4549 lbounds[0] = lbounds[1] = 0; 4550 for (i=0;i<n;i++) { 4551 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4552 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4553 } 4554 lbounds[0] = -lbounds[0]; 4555 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4556 gbounds[0] = -gbounds[0]; 4557 N = gbounds[1] - gbounds[0] + 1; 4558 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4559 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4560 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4561 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4562 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4563 4564 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4565 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4566 if (subset_mult) { 4567 const PetscInt* idxs_mult; 4568 4569 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4570 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4571 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4572 } else { 4573 for (i=0;i<n;i++) leaf_data[i] = 1; 4574 } 4575 /* local size of new subset */ 4576 n_n = 0; 4577 for (i=0;i<n;i++) n_n += leaf_data[i]; 4578 4579 /* global indexes in layout */ 4580 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4581 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4582 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4583 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4584 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4585 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4586 4587 /* reduce from leaves to roots */ 4588 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4589 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4590 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4591 4592 /* count indexes in local part of layout */ 4593 nlocals = 0; 4594 first_index = -1; 4595 first_found = PETSC_FALSE; 4596 for (i=0;i<Nl;i++) { 4597 if (!first_found && root_data[i]) { 4598 first_found = PETSC_TRUE; 4599 first_index = i; 4600 } 4601 nlocals += root_data[i]; 4602 } 4603 4604 /* cumulative of number of indexes and size of subset without holes */ 4605 #if defined(PETSC_HAVE_MPI_EXSCAN) 4606 start = 0; 4607 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4608 #else 4609 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4610 start = start-nlocals; 4611 #endif 4612 4613 if (N_n) { /* compute total size of new subset if requested */ 4614 *N_n = start + nlocals; 4615 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4616 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4617 } 4618 4619 /* adapt root data with cumulative */ 4620 if (first_found) { 4621 PetscInt old_index; 4622 4623 root_data[first_index] += start; 4624 old_index = first_index; 4625 for (i=first_index+1;i<Nl;i++) { 4626 if (root_data[i]) { 4627 root_data[i] += root_data[old_index]; 4628 old_index = i; 4629 } 4630 } 4631 } 4632 4633 /* from roots to leaves */ 4634 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4635 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4636 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4637 4638 /* create new IS with global indexes without holes */ 4639 if (subset_mult) { 4640 const PetscInt* idxs_mult; 4641 PetscInt cum; 4642 4643 cum = 0; 4644 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4645 for (i=0;i<n;i++) { 4646 PetscInt j; 4647 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4648 } 4649 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4650 } else { 4651 for (i=0;i<n;i++) { 4652 gidxs[i] = leaf_data[i]-1; 4653 } 4654 } 4655 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4656 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4657 PetscFunctionReturn(0); 4658 } 4659 4660 #undef __FUNCT__ 4661 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4662 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4663 { 4664 PetscInt i,j; 4665 PetscScalar *alphas; 4666 PetscErrorCode ierr; 4667 4668 PetscFunctionBegin; 4669 /* this implements stabilized Gram-Schmidt */ 4670 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4671 for (i=0;i<n;i++) { 4672 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4673 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4674 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4675 } 4676 ierr = PetscFree(alphas);CHKERRQ(ierr); 4677 PetscFunctionReturn(0); 4678 } 4679 4680 #undef __FUNCT__ 4681 #define __FUNCT__ "MatISGetSubassemblingPattern" 4682 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4683 { 4684 IS ranks_send_to; 4685 PetscInt n_neighs,*neighs,*n_shared,**shared; 4686 PetscMPIInt size,rank,color; 4687 PetscInt *xadj,*adjncy; 4688 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4689 PetscInt i,local_size,threshold=0; 4690 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4691 PetscSubcomm subcomm; 4692 PetscErrorCode ierr; 4693 4694 PetscFunctionBegin; 4695 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4696 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4697 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4698 4699 /* Get info on mapping */ 4700 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4701 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4702 4703 /* build local CSR graph of subdomains' connectivity */ 4704 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4705 xadj[0] = 0; 4706 xadj[1] = PetscMax(n_neighs-1,0); 4707 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4708 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4709 4710 if (threshold) { 4711 PetscInt xadj_count = 0; 4712 for (i=1;i<n_neighs;i++) { 4713 if (n_shared[i] > threshold) { 4714 adjncy[xadj_count] = neighs[i]; 4715 adjncy_wgt[xadj_count] = n_shared[i]; 4716 xadj_count++; 4717 } 4718 } 4719 xadj[1] = xadj_count; 4720 } else { 4721 if (xadj[1]) { 4722 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4723 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4724 } 4725 } 4726 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4727 if (use_square) { 4728 for (i=0;i<xadj[1];i++) { 4729 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4730 } 4731 } 4732 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4733 4734 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4735 4736 /* 4737 Restrict work on active processes only. 4738 */ 4739 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4740 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4741 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4742 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4743 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4744 if (color) { 4745 ierr = PetscFree(xadj);CHKERRQ(ierr); 4746 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4747 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4748 } else { 4749 Mat subdomain_adj; 4750 IS new_ranks,new_ranks_contig; 4751 MatPartitioning partitioner; 4752 PetscInt prank,rstart=0,rend=0; 4753 PetscInt *is_indices,*oldranks; 4754 PetscBool aggregate; 4755 4756 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4757 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4758 prank = rank; 4759 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4760 /* 4761 for (i=0;i<size;i++) { 4762 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4763 } 4764 */ 4765 for (i=0;i<xadj[1];i++) { 4766 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4767 } 4768 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4769 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4770 if (aggregate) { 4771 PetscInt lrows,row,ncols,*cols; 4772 PetscMPIInt nrank; 4773 PetscScalar *vals; 4774 4775 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4776 lrows = 0; 4777 if (nrank<redprocs) { 4778 lrows = size/redprocs; 4779 if (nrank<size%redprocs) lrows++; 4780 } 4781 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4782 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4783 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4784 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4785 row = nrank; 4786 ncols = xadj[1]-xadj[0]; 4787 cols = adjncy; 4788 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4789 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4790 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4791 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4792 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4793 ierr = PetscFree(xadj);CHKERRQ(ierr); 4794 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4795 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4796 ierr = PetscFree(vals);CHKERRQ(ierr); 4797 } else { 4798 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4799 } 4800 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4801 4802 /* Partition */ 4803 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4804 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4805 if (use_vwgt) { 4806 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4807 v_wgt[0] = local_size; 4808 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4809 } 4810 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4811 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4812 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4813 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4814 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4815 4816 /* renumber new_ranks to avoid "holes" in new set of processors */ 4817 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4818 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4819 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4820 if (!redprocs) { 4821 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4822 } else { 4823 PetscInt idxs[1]; 4824 PetscMPIInt tag; 4825 MPI_Request *reqs; 4826 4827 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4828 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4829 for (i=rstart;i<rend;i++) { 4830 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4831 } 4832 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4833 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4834 ierr = PetscFree(reqs);CHKERRQ(ierr); 4835 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4836 } 4837 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4838 /* clean up */ 4839 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4840 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4841 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4842 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4843 } 4844 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4845 4846 /* assemble parallel IS for sends */ 4847 i = 1; 4848 if (color) i=0; 4849 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4850 /* get back IS */ 4851 *is_sends = ranks_send_to; 4852 PetscFunctionReturn(0); 4853 } 4854 4855 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4856 4857 #undef __FUNCT__ 4858 #define __FUNCT__ "MatISSubassemble" 4859 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4860 { 4861 Mat local_mat; 4862 IS is_sends_internal; 4863 PetscInt rows,cols,new_local_rows; 4864 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4865 PetscBool ismatis,isdense,newisdense,destroy_mat; 4866 ISLocalToGlobalMapping l2gmap; 4867 PetscInt* l2gmap_indices; 4868 const PetscInt* is_indices; 4869 MatType new_local_type; 4870 /* buffers */ 4871 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4872 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4873 PetscInt *recv_buffer_idxs_local; 4874 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4875 /* MPI */ 4876 MPI_Comm comm,comm_n; 4877 PetscSubcomm subcomm; 4878 PetscMPIInt n_sends,n_recvs,commsize; 4879 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4880 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4881 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4882 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4883 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4884 PetscErrorCode ierr; 4885 4886 PetscFunctionBegin; 4887 /* TODO: add missing checks */ 4888 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4889 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4890 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4891 PetscValidLogicalCollectiveInt(mat,nis,7); 4892 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4893 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4894 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4895 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4896 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4897 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4898 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4899 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4900 PetscInt mrows,mcols,mnrows,mncols; 4901 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4902 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4903 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4904 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4905 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4906 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4907 } 4908 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4909 PetscValidLogicalCollectiveInt(mat,bs,0); 4910 /* prepare IS for sending if not provided */ 4911 if (!is_sends) { 4912 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4913 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4914 } else { 4915 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4916 is_sends_internal = is_sends; 4917 } 4918 4919 /* get comm */ 4920 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4921 4922 /* compute number of sends */ 4923 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4924 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4925 4926 /* compute number of receives */ 4927 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4928 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4929 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4930 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4931 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4932 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4933 ierr = PetscFree(iflags);CHKERRQ(ierr); 4934 4935 /* restrict comm if requested */ 4936 subcomm = 0; 4937 destroy_mat = PETSC_FALSE; 4938 if (restrict_comm) { 4939 PetscMPIInt color,subcommsize; 4940 4941 color = 0; 4942 if (restrict_full) { 4943 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4944 } else { 4945 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4946 } 4947 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4948 subcommsize = commsize - subcommsize; 4949 /* check if reuse has been requested */ 4950 if (reuse == MAT_REUSE_MATRIX) { 4951 if (*mat_n) { 4952 PetscMPIInt subcommsize2; 4953 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4954 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4955 comm_n = PetscObjectComm((PetscObject)*mat_n); 4956 } else { 4957 comm_n = PETSC_COMM_SELF; 4958 } 4959 } else { /* MAT_INITIAL_MATRIX */ 4960 PetscMPIInt rank; 4961 4962 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4963 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4964 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4965 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4966 comm_n = PetscSubcommChild(subcomm); 4967 } 4968 /* flag to destroy *mat_n if not significative */ 4969 if (color) destroy_mat = PETSC_TRUE; 4970 } else { 4971 comm_n = comm; 4972 } 4973 4974 /* prepare send/receive buffers */ 4975 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4976 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4977 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4978 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4979 if (nis) { 4980 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4981 } 4982 4983 /* Get data from local matrices */ 4984 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4985 /* TODO: See below some guidelines on how to prepare the local buffers */ 4986 /* 4987 send_buffer_vals should contain the raw values of the local matrix 4988 send_buffer_idxs should contain: 4989 - MatType_PRIVATE type 4990 - PetscInt size_of_l2gmap 4991 - PetscInt global_row_indices[size_of_l2gmap] 4992 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4993 */ 4994 else { 4995 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4996 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4997 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4998 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4999 send_buffer_idxs[1] = i; 5000 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5001 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5002 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5003 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5004 for (i=0;i<n_sends;i++) { 5005 ilengths_vals[is_indices[i]] = len*len; 5006 ilengths_idxs[is_indices[i]] = len+2; 5007 } 5008 } 5009 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5010 /* additional is (if any) */ 5011 if (nis) { 5012 PetscMPIInt psum; 5013 PetscInt j; 5014 for (j=0,psum=0;j<nis;j++) { 5015 PetscInt plen; 5016 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5017 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5018 psum += len+1; /* indices + lenght */ 5019 } 5020 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5021 for (j=0,psum=0;j<nis;j++) { 5022 PetscInt plen; 5023 const PetscInt *is_array_idxs; 5024 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5025 send_buffer_idxs_is[psum] = plen; 5026 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5027 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5028 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5029 psum += plen+1; /* indices + lenght */ 5030 } 5031 for (i=0;i<n_sends;i++) { 5032 ilengths_idxs_is[is_indices[i]] = psum; 5033 } 5034 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5035 } 5036 5037 buf_size_idxs = 0; 5038 buf_size_vals = 0; 5039 buf_size_idxs_is = 0; 5040 for (i=0;i<n_recvs;i++) { 5041 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5042 buf_size_vals += (PetscInt)olengths_vals[i]; 5043 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5044 } 5045 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5046 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5047 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5048 5049 /* get new tags for clean communications */ 5050 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5051 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5052 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5053 5054 /* allocate for requests */ 5055 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5056 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5057 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5058 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5059 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5060 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5061 5062 /* communications */ 5063 ptr_idxs = recv_buffer_idxs; 5064 ptr_vals = recv_buffer_vals; 5065 ptr_idxs_is = recv_buffer_idxs_is; 5066 for (i=0;i<n_recvs;i++) { 5067 source_dest = onodes[i]; 5068 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5069 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5070 ptr_idxs += olengths_idxs[i]; 5071 ptr_vals += olengths_vals[i]; 5072 if (nis) { 5073 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5074 ptr_idxs_is += olengths_idxs_is[i]; 5075 } 5076 } 5077 for (i=0;i<n_sends;i++) { 5078 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5079 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5080 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5081 if (nis) { 5082 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5083 } 5084 } 5085 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5086 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5087 5088 /* assemble new l2g map */ 5089 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5090 ptr_idxs = recv_buffer_idxs; 5091 new_local_rows = 0; 5092 for (i=0;i<n_recvs;i++) { 5093 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5094 ptr_idxs += olengths_idxs[i]; 5095 } 5096 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5097 ptr_idxs = recv_buffer_idxs; 5098 new_local_rows = 0; 5099 for (i=0;i<n_recvs;i++) { 5100 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5101 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5102 ptr_idxs += olengths_idxs[i]; 5103 } 5104 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5105 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5106 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5107 5108 /* infer new local matrix type from received local matrices type */ 5109 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5110 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5111 if (n_recvs) { 5112 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5113 ptr_idxs = recv_buffer_idxs; 5114 for (i=0;i<n_recvs;i++) { 5115 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5116 new_local_type_private = MATAIJ_PRIVATE; 5117 break; 5118 } 5119 ptr_idxs += olengths_idxs[i]; 5120 } 5121 switch (new_local_type_private) { 5122 case MATDENSE_PRIVATE: 5123 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5124 new_local_type = MATSEQAIJ; 5125 bs = 1; 5126 } else { /* if I receive only 1 dense matrix */ 5127 new_local_type = MATSEQDENSE; 5128 bs = 1; 5129 } 5130 break; 5131 case MATAIJ_PRIVATE: 5132 new_local_type = MATSEQAIJ; 5133 bs = 1; 5134 break; 5135 case MATBAIJ_PRIVATE: 5136 new_local_type = MATSEQBAIJ; 5137 break; 5138 case MATSBAIJ_PRIVATE: 5139 new_local_type = MATSEQSBAIJ; 5140 break; 5141 default: 5142 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5143 break; 5144 } 5145 } else { /* by default, new_local_type is seqdense */ 5146 new_local_type = MATSEQDENSE; 5147 bs = 1; 5148 } 5149 5150 /* create MATIS object if needed */ 5151 if (reuse == MAT_INITIAL_MATRIX) { 5152 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5153 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5154 } else { 5155 /* it also destroys the local matrices */ 5156 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5157 } 5158 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5159 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5160 5161 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5162 5163 /* Global to local map of received indices */ 5164 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5165 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5166 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5167 5168 /* restore attributes -> type of incoming data and its size */ 5169 buf_size_idxs = 0; 5170 for (i=0;i<n_recvs;i++) { 5171 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5172 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5173 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5174 } 5175 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5176 5177 /* set preallocation */ 5178 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5179 if (!newisdense) { 5180 PetscInt *new_local_nnz=0; 5181 5182 ptr_vals = recv_buffer_vals; 5183 ptr_idxs = recv_buffer_idxs_local; 5184 if (n_recvs) { 5185 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5186 } 5187 for (i=0;i<n_recvs;i++) { 5188 PetscInt j; 5189 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5190 for (j=0;j<*(ptr_idxs+1);j++) { 5191 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5192 } 5193 } else { 5194 /* TODO */ 5195 } 5196 ptr_idxs += olengths_idxs[i]; 5197 } 5198 if (new_local_nnz) { 5199 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5200 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5201 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5202 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5203 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5204 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5205 } else { 5206 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5207 } 5208 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5209 } else { 5210 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5211 } 5212 5213 /* set values */ 5214 ptr_vals = recv_buffer_vals; 5215 ptr_idxs = recv_buffer_idxs_local; 5216 for (i=0;i<n_recvs;i++) { 5217 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5218 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5219 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5220 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5221 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5222 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5223 } else { 5224 /* TODO */ 5225 } 5226 ptr_idxs += olengths_idxs[i]; 5227 ptr_vals += olengths_vals[i]; 5228 } 5229 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5230 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5231 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5232 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5233 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5234 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5235 5236 #if 0 5237 if (!restrict_comm) { /* check */ 5238 Vec lvec,rvec; 5239 PetscReal infty_error; 5240 5241 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5242 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5243 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5244 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5245 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5246 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5247 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5248 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5249 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5250 } 5251 #endif 5252 5253 /* assemble new additional is (if any) */ 5254 if (nis) { 5255 PetscInt **temp_idxs,*count_is,j,psum; 5256 5257 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5258 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5259 ptr_idxs = recv_buffer_idxs_is; 5260 psum = 0; 5261 for (i=0;i<n_recvs;i++) { 5262 for (j=0;j<nis;j++) { 5263 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5264 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5265 psum += plen; 5266 ptr_idxs += plen+1; /* shift pointer to received data */ 5267 } 5268 } 5269 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5270 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5271 for (i=1;i<nis;i++) { 5272 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5273 } 5274 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5275 ptr_idxs = recv_buffer_idxs_is; 5276 for (i=0;i<n_recvs;i++) { 5277 for (j=0;j<nis;j++) { 5278 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5279 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5280 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5281 ptr_idxs += plen+1; /* shift pointer to received data */ 5282 } 5283 } 5284 for (i=0;i<nis;i++) { 5285 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5286 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5287 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5288 } 5289 ierr = PetscFree(count_is);CHKERRQ(ierr); 5290 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5291 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5292 } 5293 /* free workspace */ 5294 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5295 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5296 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5297 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5298 if (isdense) { 5299 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5300 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5301 } else { 5302 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5303 } 5304 if (nis) { 5305 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5306 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5307 } 5308 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5309 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5310 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5311 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5312 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5313 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5314 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5315 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5316 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5317 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5318 ierr = PetscFree(onodes);CHKERRQ(ierr); 5319 if (nis) { 5320 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5321 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5322 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5323 } 5324 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5325 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5326 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5327 for (i=0;i<nis;i++) { 5328 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5329 } 5330 *mat_n = NULL; 5331 } 5332 PetscFunctionReturn(0); 5333 } 5334 5335 /* temporary hack into ksp private data structure */ 5336 #include <petsc/private/kspimpl.h> 5337 5338 #undef __FUNCT__ 5339 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5340 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5341 { 5342 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5343 PC_IS *pcis = (PC_IS*)pc->data; 5344 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5345 MatNullSpace CoarseNullSpace=NULL; 5346 ISLocalToGlobalMapping coarse_islg; 5347 IS coarse_is,*isarray; 5348 PetscInt i,im_active=-1,active_procs=-1; 5349 PetscInt nis,nisdofs,nisneu,nisvert; 5350 PC pc_temp; 5351 PCType coarse_pc_type; 5352 KSPType coarse_ksp_type; 5353 PetscBool multilevel_requested,multilevel_allowed; 5354 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5355 Mat t_coarse_mat_is; 5356 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 5357 PetscMPIInt all_procs; 5358 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 5359 PetscBool compute_vecs = PETSC_FALSE; 5360 PetscScalar *array; 5361 PetscErrorCode ierr; 5362 5363 PetscFunctionBegin; 5364 /* Assign global numbering to coarse dofs */ 5365 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5366 PetscInt ocoarse_size; 5367 compute_vecs = PETSC_TRUE; 5368 ocoarse_size = pcbddc->coarse_size; 5369 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5370 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5371 /* see if we can avoid some work */ 5372 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5373 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5374 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5375 PC pc; 5376 PetscBool isbddc; 5377 5378 /* temporary workaround since PCBDDC does not have a reset method so far */ 5379 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5380 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5381 if (isbddc) { 5382 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5383 } else { 5384 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5385 } 5386 coarse_reuse = PETSC_FALSE; 5387 } else { /* we can safely reuse already computed coarse matrix */ 5388 coarse_reuse = PETSC_TRUE; 5389 } 5390 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5391 coarse_reuse = PETSC_FALSE; 5392 } 5393 /* reset any subassembling information */ 5394 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5395 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5396 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5397 coarse_reuse = PETSC_TRUE; 5398 } 5399 5400 /* count "active" (i.e. with positive local size) and "void" processes */ 5401 im_active = !!(pcis->n); 5402 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5403 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 5404 void_procs = all_procs-active_procs; 5405 csin_type_simple = PETSC_TRUE; 5406 redist = PETSC_FALSE; 5407 if (pcbddc->current_level && void_procs) { 5408 csin_ml = PETSC_TRUE; 5409 ncoarse_ml = void_procs; 5410 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 5411 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 5412 csin_ds = PETSC_TRUE; 5413 ncoarse_ds = pcbddc->redistribute_coarse; 5414 redist = PETSC_TRUE; 5415 } else { 5416 csin_ds = PETSC_TRUE; 5417 ncoarse_ds = active_procs; 5418 redist = PETSC_TRUE; 5419 } 5420 } else { 5421 csin_ml = PETSC_FALSE; 5422 ncoarse_ml = all_procs; 5423 if (void_procs) { 5424 csin_ds = PETSC_TRUE; 5425 ncoarse_ds = void_procs; 5426 csin_type_simple = PETSC_FALSE; 5427 } else { 5428 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 5429 csin_ds = PETSC_TRUE; 5430 ncoarse_ds = pcbddc->redistribute_coarse; 5431 redist = PETSC_TRUE; 5432 } else { 5433 csin_ds = PETSC_FALSE; 5434 ncoarse_ds = all_procs; 5435 } 5436 } 5437 } 5438 5439 /* 5440 test if we can go multilevel: three conditions must be satisfied: 5441 - we have not exceeded the number of levels requested 5442 - we can actually subassemble the active processes 5443 - we can find a suitable number of MPI processes where we can place the subassembled problem 5444 */ 5445 multilevel_allowed = PETSC_FALSE; 5446 multilevel_requested = PETSC_FALSE; 5447 if (pcbddc->current_level < pcbddc->max_levels) { 5448 multilevel_requested = PETSC_TRUE; 5449 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 5450 multilevel_allowed = PETSC_FALSE; 5451 } else { 5452 multilevel_allowed = PETSC_TRUE; 5453 } 5454 } 5455 /* determine number of process partecipating to coarse solver */ 5456 if (multilevel_allowed) { 5457 ncoarse = ncoarse_ml; 5458 csin = csin_ml; 5459 redist = PETSC_FALSE; 5460 } else { 5461 ncoarse = ncoarse_ds; 5462 csin = csin_ds; 5463 } 5464 5465 /* creates temporary l2gmap and IS for coarse indexes */ 5466 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5467 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5468 5469 /* creates temporary MATIS object for coarse matrix */ 5470 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5471 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5472 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5473 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5474 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5475 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5476 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5477 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5478 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5479 5480 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5481 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 5482 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5483 const PetscInt *idxs; 5484 ISLocalToGlobalMapping tmap; 5485 5486 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5487 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5488 /* allocate space for temporary storage */ 5489 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5490 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5491 /* allocate for IS array */ 5492 nisdofs = pcbddc->n_ISForDofsLocal; 5493 nisneu = !!pcbddc->NeumannBoundariesLocal; 5494 nisvert = 0; 5495 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5496 nisvert = 1; 5497 } 5498 nis = nisdofs + nisneu + nisvert; 5499 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5500 /* dofs splitting */ 5501 for (i=0;i<nisdofs;i++) { 5502 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5503 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5504 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5505 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5506 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5507 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5509 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5510 } 5511 /* neumann boundaries */ 5512 if (pcbddc->NeumannBoundariesLocal) { 5513 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5514 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5515 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5516 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5517 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5518 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5519 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5520 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5521 } 5522 /* primal vertices (benign) */ 5523 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 5524 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 5525 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5526 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5527 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 5528 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5529 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 5530 } 5531 /* free memory */ 5532 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5533 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5534 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5535 } else { 5536 nis = 0; 5537 nisdofs = 0; 5538 nisneu = 0; 5539 nisvert = 0; 5540 isarray = NULL; 5541 } 5542 /* destroy no longer needed map */ 5543 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5544 5545 /* restrict on coarse candidates (if needed) */ 5546 coarse_mat_is = NULL; 5547 if (csin) { 5548 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5549 if (redist) { 5550 PetscMPIInt rank; 5551 PetscInt spc,n_spc_p1,dest[1],destsize; 5552 5553 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5554 spc = active_procs/ncoarse; 5555 n_spc_p1 = active_procs%ncoarse; 5556 if (im_active) { 5557 destsize = 1; 5558 if (rank > n_spc_p1*(spc+1)-1) { 5559 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5560 } else { 5561 dest[0] = rank/(spc+1); 5562 } 5563 } else { 5564 destsize = 0; 5565 } 5566 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5567 } else if (csin_type_simple) { 5568 PetscMPIInt rank; 5569 PetscInt issize,isidx; 5570 5571 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5572 if (im_active) { 5573 issize = 1; 5574 isidx = (PetscInt)rank; 5575 } else { 5576 issize = 0; 5577 isidx = -1; 5578 } 5579 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5580 } else { /* get a suitable subassembling pattern from MATIS code */ 5581 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5582 } 5583 5584 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5585 if (!redist || ncoarse <= void_procs) { 5586 PetscInt ncoarse_cand,tissize,*nisindices; 5587 PetscInt *coarse_candidates; 5588 const PetscInt* tisindices; 5589 5590 /* get coarse candidates' ranks in pc communicator */ 5591 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5592 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5593 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5594 if (!coarse_candidates[i]) { 5595 coarse_candidates[ncoarse_cand++]=i; 5596 } 5597 } 5598 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5599 5600 5601 if (pcbddc->dbg_flag) { 5602 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5604 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5605 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5606 for (i=0;i<ncoarse_cand;i++) { 5607 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5608 } 5609 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5610 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5611 } 5612 /* shift the pattern on coarse candidates */ 5613 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5614 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5615 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5616 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5617 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5618 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5619 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5620 } 5621 if (pcbddc->dbg_flag) { 5622 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5623 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5624 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5626 } 5627 } 5628 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5629 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5630 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5631 } else { /* this is the last level, so use just receiving processes in subcomm */ 5632 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5633 } 5634 } else { 5635 if (pcbddc->dbg_flag) { 5636 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5637 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5639 } 5640 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5641 coarse_mat_is = t_coarse_mat_is; 5642 } 5643 5644 /* create local to global scatters for coarse problem */ 5645 if (compute_vecs) { 5646 PetscInt lrows; 5647 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5648 if (coarse_mat_is) { 5649 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5650 } else { 5651 lrows = 0; 5652 } 5653 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5654 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5655 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5656 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5657 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5658 } 5659 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5660 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5661 5662 /* set defaults for coarse KSP and PC */ 5663 if (multilevel_allowed) { 5664 coarse_ksp_type = KSPRICHARDSON; 5665 coarse_pc_type = PCBDDC; 5666 } else { 5667 coarse_ksp_type = KSPPREONLY; 5668 coarse_pc_type = PCREDUNDANT; 5669 } 5670 5671 /* print some info if requested */ 5672 if (pcbddc->dbg_flag) { 5673 if (!multilevel_allowed) { 5674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5675 if (multilevel_requested) { 5676 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5677 } else if (pcbddc->max_levels) { 5678 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5679 } 5680 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5681 } 5682 } 5683 5684 /* create the coarse KSP object only once with defaults */ 5685 if (coarse_mat_is) { 5686 MatReuse coarse_mat_reuse; 5687 PetscViewer dbg_viewer = NULL; 5688 if (pcbddc->dbg_flag) { 5689 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5690 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5691 } 5692 if (!pcbddc->coarse_ksp) { 5693 char prefix[256],str_level[16]; 5694 size_t len; 5695 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5696 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5697 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5698 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5699 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5700 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5701 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5702 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5703 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5704 /* prefix */ 5705 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5706 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5707 if (!pcbddc->current_level) { 5708 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5709 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5710 } else { 5711 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5712 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5713 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5714 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5715 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5716 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5717 } 5718 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5719 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5720 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5721 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5722 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5723 /* allow user customization */ 5724 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5725 } 5726 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5727 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5728 if (nisdofs) { 5729 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5730 for (i=0;i<nisdofs;i++) { 5731 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5732 } 5733 } 5734 if (nisneu) { 5735 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5736 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5737 } 5738 if (nisvert) { 5739 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5740 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5741 } 5742 5743 /* get some info after set from options */ 5744 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5745 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5746 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5747 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5748 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5749 isbddc = PETSC_FALSE; 5750 } 5751 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5752 if (isredundant) { 5753 KSP inner_ksp; 5754 PC inner_pc; 5755 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5756 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5757 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5758 } 5759 5760 /* assemble coarse matrix */ 5761 if (coarse_reuse) { 5762 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5763 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5764 coarse_mat_reuse = MAT_REUSE_MATRIX; 5765 } else { 5766 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5767 } 5768 if (isbddc || isnn) { 5769 if (isbddc) { /* currently there are no APIs for these options */ 5770 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5771 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5772 pcbddc_coarse->benign_saddle_point = pcbddc->benign_saddle_point; 5773 pcbddc_coarse->benign_compute_nonetflux = pcbddc->benign_compute_nonetflux; 5774 if (pcbddc_coarse->benign_compute_nonetflux) { 5775 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5776 } 5777 } 5778 if (pcbddc->coarsening_ratio > 1) { 5779 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5780 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5781 if (pcbddc->dbg_flag) { 5782 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5783 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5784 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5785 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5786 } 5787 } 5788 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5789 } else { 5790 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5791 coarse_mat = coarse_mat_is; 5792 } 5793 } else { 5794 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5795 } 5796 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5797 5798 /* propagate symmetry info of coarse matrix */ 5799 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5800 if (pc->pmat->symmetric_set) { 5801 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5802 } 5803 if (pc->pmat->hermitian_set) { 5804 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5805 } 5806 if (pc->pmat->spd_set) { 5807 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5808 } 5809 /* set operators */ 5810 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5811 if (pcbddc->dbg_flag) { 5812 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5813 } 5814 } else { /* processes non partecipating to coarse solver (if any) */ 5815 coarse_mat = 0; 5816 } 5817 ierr = PetscFree(isarray);CHKERRQ(ierr); 5818 #if 0 5819 { 5820 PetscViewer viewer; 5821 char filename[256]; 5822 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5823 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5824 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5825 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5826 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5827 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5828 } 5829 #endif 5830 5831 /* Compute coarse null space (special handling by BDDC only) */ 5832 #if 0 5833 if (pcbddc->NullSpace) { 5834 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5835 } 5836 #endif 5837 /* hack */ 5838 if (pcbddc->coarse_ksp) { 5839 Vec crhs,csol; 5840 5841 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5842 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5843 if (!csol) { 5844 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5845 } 5846 if (!crhs) { 5847 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5848 } 5849 } 5850 5851 /* compute null space for coarse solver if the benign trick has been requested */ 5852 if (pcbddc->benign_null) { 5853 5854 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5855 for (i=0;i<pcbddc->benign_n;i++) { 5856 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5857 } 5858 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5859 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5860 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5861 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5862 if (coarse_mat) { 5863 Vec nullv; 5864 PetscScalar *array,*array2; 5865 PetscInt nl; 5866 5867 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5868 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5869 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5870 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5871 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5872 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5873 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5874 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5875 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5876 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5877 } 5878 } 5879 5880 if (pcbddc->coarse_ksp) { 5881 PetscBool ispreonly; 5882 5883 if (CoarseNullSpace) { 5884 PetscBool isnull; 5885 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5886 if (0) { 5887 if (isbddc && !pcbddc->benign_saddle_point) { 5888 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5889 } else { 5890 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5891 } 5892 } else { 5893 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5894 } 5895 } 5896 /* setup coarse ksp */ 5897 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5898 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5899 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5900 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5901 KSP check_ksp; 5902 KSPType check_ksp_type; 5903 PC check_pc; 5904 Vec check_vec,coarse_vec; 5905 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5906 PetscInt its; 5907 PetscBool compute_eigs; 5908 PetscReal *eigs_r,*eigs_c; 5909 PetscInt neigs; 5910 const char *prefix; 5911 5912 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5913 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5914 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5915 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5916 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5917 if (ispreonly) { 5918 check_ksp_type = KSPPREONLY; 5919 compute_eigs = PETSC_FALSE; 5920 } else { 5921 check_ksp_type = KSPGMRES; 5922 compute_eigs = PETSC_TRUE; 5923 } 5924 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5925 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5926 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5927 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5928 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5929 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5930 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5931 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5932 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5933 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5934 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5935 /* create random vec */ 5936 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5937 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5938 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5939 if (CoarseNullSpace) { 5940 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5941 } 5942 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5943 /* solve coarse problem */ 5944 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5945 if (CoarseNullSpace) { 5946 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5947 } 5948 /* set eigenvalue estimation if preonly has not been requested */ 5949 if (compute_eigs) { 5950 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5951 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5952 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5953 lambda_max = eigs_r[neigs-1]; 5954 lambda_min = eigs_r[0]; 5955 if (pcbddc->use_coarse_estimates) { 5956 if (lambda_max>lambda_min) { 5957 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5958 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5959 } 5960 } 5961 } 5962 5963 /* check coarse problem residual error */ 5964 if (pcbddc->dbg_flag) { 5965 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5966 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5967 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5968 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5969 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5970 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5971 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5972 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5973 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5974 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5975 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5976 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5977 if (CoarseNullSpace) { 5978 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5979 } 5980 if (compute_eigs) { 5981 PetscReal lambda_max_s,lambda_min_s; 5982 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5983 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5984 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5985 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5986 for (i=0;i<neigs;i++) { 5987 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5988 } 5989 } 5990 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5991 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5992 } 5993 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5994 if (compute_eigs) { 5995 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5996 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5997 } 5998 } 5999 } 6000 /* print additional info */ 6001 if (pcbddc->dbg_flag) { 6002 /* waits until all processes reaches this point */ 6003 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6004 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6005 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6006 } 6007 6008 /* free memory */ 6009 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6010 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6011 PetscFunctionReturn(0); 6012 } 6013 6014 #undef __FUNCT__ 6015 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6016 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6017 { 6018 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6019 PC_IS* pcis = (PC_IS*)pc->data; 6020 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6021 IS subset,subset_mult,subset_n; 6022 PetscInt local_size,coarse_size=0; 6023 PetscInt *local_primal_indices=NULL; 6024 const PetscInt *t_local_primal_indices; 6025 PetscErrorCode ierr; 6026 6027 PetscFunctionBegin; 6028 /* Compute global number of coarse dofs */ 6029 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6030 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6031 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6032 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6033 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6034 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6035 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6036 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6037 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6038 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6039 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6040 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6041 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6042 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6043 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6044 6045 /* check numbering */ 6046 if (pcbddc->dbg_flag) { 6047 PetscScalar coarsesum,*array,*array2; 6048 PetscInt i; 6049 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6050 6051 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6052 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6053 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6054 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6055 /* counter */ 6056 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6057 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6058 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6059 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6060 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6061 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6062 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6063 for (i=0;i<pcbddc->local_primal_size;i++) { 6064 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6065 } 6066 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6067 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6068 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6069 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6070 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6071 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6072 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6073 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6074 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6075 for (i=0;i<pcis->n;i++) { 6076 if (array[i] != 0.0 && array[i] != array2[i]) { 6077 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 6078 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6079 set_error = PETSC_TRUE; 6080 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 6081 } 6082 } 6083 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6084 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6086 for (i=0;i<pcis->n;i++) { 6087 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6088 } 6089 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6090 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6091 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6092 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6093 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6095 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6096 PetscInt *gidxs; 6097 6098 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6099 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6100 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6102 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6103 for (i=0;i<pcbddc->local_primal_size;i++) { 6104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6105 } 6106 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6107 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6108 } 6109 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6110 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6111 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6112 } 6113 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6114 /* get back data */ 6115 *coarse_size_n = coarse_size; 6116 *local_primal_indices_n = local_primal_indices; 6117 PetscFunctionReturn(0); 6118 } 6119 6120 #undef __FUNCT__ 6121 #define __FUNCT__ "PCBDDCGlobalToLocal" 6122 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6123 { 6124 IS localis_t; 6125 PetscInt i,lsize,*idxs,n; 6126 PetscScalar *vals; 6127 PetscErrorCode ierr; 6128 6129 PetscFunctionBegin; 6130 /* get indices in local ordering exploiting local to global map */ 6131 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6132 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6133 for (i=0;i<lsize;i++) vals[i] = 1.0; 6134 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6135 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6136 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6137 if (idxs) { /* multilevel guard */ 6138 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6139 } 6140 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6141 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6142 ierr = PetscFree(vals);CHKERRQ(ierr); 6143 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6144 /* now compute set in local ordering */ 6145 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6146 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6147 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6148 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6149 for (i=0,lsize=0;i<n;i++) { 6150 if (PetscRealPart(vals[i]) > 0.5) { 6151 lsize++; 6152 } 6153 } 6154 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6155 for (i=0,lsize=0;i<n;i++) { 6156 if (PetscRealPart(vals[i]) > 0.5) { 6157 idxs[lsize++] = i; 6158 } 6159 } 6160 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6161 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6162 *localis = localis_t; 6163 PetscFunctionReturn(0); 6164 } 6165 6166 #undef __FUNCT__ 6167 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6168 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6169 { 6170 PC_IS *pcis=(PC_IS*)pc->data; 6171 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6172 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6173 Mat S_j; 6174 PetscInt *used_xadj,*used_adjncy; 6175 PetscBool free_used_adj; 6176 PetscErrorCode ierr; 6177 6178 PetscFunctionBegin; 6179 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6180 free_used_adj = PETSC_FALSE; 6181 if (pcbddc->sub_schurs_layers == -1) { 6182 used_xadj = NULL; 6183 used_adjncy = NULL; 6184 } else { 6185 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6186 used_xadj = pcbddc->mat_graph->xadj; 6187 used_adjncy = pcbddc->mat_graph->adjncy; 6188 } else if (pcbddc->computed_rowadj) { 6189 used_xadj = pcbddc->mat_graph->xadj; 6190 used_adjncy = pcbddc->mat_graph->adjncy; 6191 } else { 6192 PetscBool flg_row=PETSC_FALSE; 6193 const PetscInt *xadj,*adjncy; 6194 PetscInt nvtxs; 6195 6196 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6197 if (flg_row) { 6198 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6199 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6200 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6201 free_used_adj = PETSC_TRUE; 6202 } else { 6203 pcbddc->sub_schurs_layers = -1; 6204 used_xadj = NULL; 6205 used_adjncy = NULL; 6206 } 6207 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6208 } 6209 } 6210 6211 /* setup sub_schurs data */ 6212 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6213 if (!sub_schurs->schur_explicit) { 6214 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6215 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6216 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6217 } else { 6218 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6219 PetscBool isseqaij; 6220 PetscInt benign_n; 6221 Mat change = NULL; 6222 IS change_primal = NULL; 6223 6224 if (!pcbddc->use_vertices && reuse_solvers) { 6225 PetscInt n_vertices; 6226 6227 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6228 reuse_solvers = (PetscBool)!n_vertices; 6229 } 6230 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6231 if (!isseqaij) { 6232 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6233 if (matis->A == pcbddc->local_mat) { 6234 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6235 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6236 } else { 6237 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6238 } 6239 } 6240 if (!pcbddc->benign_change_explicit) { 6241 benign_n = pcbddc->benign_n; 6242 } else { 6243 benign_n = 0; 6244 } 6245 /* If the user defines additional constraints, we import them here. 6246 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done, at the moment, without some hacking */ 6247 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6248 PC_IS *pcisf; 6249 PC_BDDC *pcbddcf; 6250 PC pcf; 6251 6252 if (pcbddc->sub_schurs_rebuild) { 6253 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6254 } 6255 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6256 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6257 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6258 /* hacks */ 6259 pcisf = (PC_IS*)pcf->data; 6260 pcisf->is_B_local = pcis->is_B_local; 6261 pcisf->vec1_N = pcis->vec1_N; 6262 pcisf->BtoNmap = pcis->BtoNmap; 6263 pcisf->n = pcis->n; 6264 pcisf->n_B = pcis->n_B; 6265 pcbddcf = (PC_BDDC*)pcf->data; 6266 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6267 pcbddcf->mat_graph = pcbddc->mat_graph; 6268 pcbddcf->use_faces = PETSC_TRUE; 6269 pcbddcf->use_change_of_basis = PETSC_TRUE; 6270 pcbddcf->use_change_on_faces = PETSC_TRUE; 6271 pcbddcf->use_qr_single = PETSC_TRUE; 6272 pcbddcf->fake_change = PETSC_TRUE; 6273 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6274 /* store information on primal vertices and change of basis (in local numbering) */ 6275 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6276 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6277 change = pcbddcf->ConstraintMatrix; 6278 pcbddcf->ConstraintMatrix = NULL; 6279 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6280 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6281 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6282 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6283 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6284 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6285 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6286 pcf->ops->destroy = NULL; 6287 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6288 } 6289 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6290 ierr = MatDestroy(&change);CHKERRQ(ierr); 6291 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6292 } 6293 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6294 6295 /* free adjacency */ 6296 if (free_used_adj) { 6297 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6298 } 6299 PetscFunctionReturn(0); 6300 } 6301 6302 #undef __FUNCT__ 6303 #define __FUNCT__ "PCBDDCInitSubSchurs" 6304 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6305 { 6306 PC_IS *pcis=(PC_IS*)pc->data; 6307 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6308 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6309 PCBDDCGraph graph; 6310 PetscErrorCode ierr; 6311 6312 PetscFunctionBegin; 6313 /* attach interface graph for determining subsets */ 6314 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6315 IS verticesIS,verticescomm; 6316 PetscInt vsize,*idxs; 6317 6318 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6319 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6320 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6321 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6322 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6323 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6324 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6325 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6326 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6327 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6328 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6329 /* 6330 if (pcbddc->dbg_flag) { 6331 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6332 } 6333 */ 6334 } else { 6335 graph = pcbddc->mat_graph; 6336 } 6337 6338 /* sub_schurs init */ 6339 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6340 6341 /* free graph struct */ 6342 if (pcbddc->sub_schurs_rebuild) { 6343 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6344 } 6345 PetscFunctionReturn(0); 6346 } 6347 6348 #undef __FUNCT__ 6349 #define __FUNCT__ "PCBDDCCheckOperator" 6350 PetscErrorCode PCBDDCCheckOperator(PC pc) 6351 { 6352 PC_IS *pcis=(PC_IS*)pc->data; 6353 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6354 PetscErrorCode ierr; 6355 6356 PetscFunctionBegin; 6357 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6358 IS zerodiag = NULL; 6359 Mat S_j,B0_B=NULL; 6360 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6361 PetscScalar *p0_check,*array,*array2; 6362 PetscReal norm; 6363 PetscInt i; 6364 6365 /* B0 and B0_B */ 6366 if (zerodiag) { 6367 IS dummy; 6368 6369 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6370 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6371 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6372 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6373 } 6374 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6375 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6376 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6377 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6378 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6379 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6380 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6381 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6382 /* S_j */ 6383 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6384 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6385 6386 /* mimic vector in \widetilde{W}_\Gamma */ 6387 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6388 /* continuous in primal space */ 6389 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6390 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6391 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6392 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6393 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6394 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6395 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6396 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6397 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6398 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6399 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6400 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6401 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6402 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6403 6404 /* assemble rhs for coarse problem */ 6405 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6406 /* local with Schur */ 6407 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6408 if (zerodiag) { 6409 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6410 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6411 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6412 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6413 } 6414 /* sum on primal nodes the local contributions */ 6415 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6416 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6417 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6418 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6419 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6420 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6421 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6422 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6423 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6424 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6425 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6426 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6427 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6428 /* scale primal nodes (BDDC sums contibutions) */ 6429 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6430 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6431 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6432 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6433 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6434 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6435 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6436 /* global: \widetilde{B0}_B w_\Gamma */ 6437 if (zerodiag) { 6438 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6439 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6440 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6441 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6442 } 6443 /* BDDC */ 6444 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6445 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6446 6447 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6448 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6449 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6450 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6451 for (i=0;i<pcbddc->benign_n;i++) { 6452 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6453 } 6454 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6455 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6456 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6457 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6458 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6459 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6460 } 6461 PetscFunctionReturn(0); 6462 } 6463