1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 9 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 10 { 11 PCBDDCBenignMatMult_ctx ctx; 12 PetscErrorCode ierr; 13 PetscBool apply_right,apply_left,reset_x; 14 15 PetscFunctionBegin; 16 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 17 if (transpose) { 18 apply_right = ctx->apply_left; 19 apply_left = ctx->apply_right; 20 } else { 21 apply_right = ctx->apply_right; 22 apply_left = ctx->apply_left; 23 } 24 reset_x = PETSC_FALSE; 25 if (apply_right) { 26 const PetscScalar *ax; 27 PetscInt nl,i; 28 29 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 30 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 31 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 32 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 33 for (i=0;i<ctx->benign_n;i++) { 34 PetscScalar sum,val; 35 const PetscInt *idxs; 36 PetscInt nz,j; 37 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 38 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 39 sum = 0.; 40 if (ctx->apply_p0) { 41 val = ctx->work[idxs[nz-1]]; 42 for (j=0;j<nz-1;j++) { 43 sum += ctx->work[idxs[j]]; 44 ctx->work[idxs[j]] += val; 45 } 46 } else { 47 for (j=0;j<nz-1;j++) { 48 sum += ctx->work[idxs[j]]; 49 } 50 } 51 ctx->work[idxs[nz-1]] -= sum; 52 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 53 } 54 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 55 reset_x = PETSC_TRUE; 56 } 57 if (transpose) { 58 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 59 } else { 60 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 61 } 62 if (reset_x) { 63 ierr = VecResetArray(x);CHKERRQ(ierr); 64 } 65 if (apply_left) { 66 PetscScalar *ay; 67 PetscInt i; 68 69 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 70 for (i=0;i<ctx->benign_n;i++) { 71 PetscScalar sum,val; 72 const PetscInt *idxs; 73 PetscInt nz,j; 74 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 75 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 76 val = -ay[idxs[nz-1]]; 77 if (ctx->apply_p0) { 78 sum = 0.; 79 for (j=0;j<nz-1;j++) { 80 sum += ay[idxs[j]]; 81 ay[idxs[j]] += val; 82 } 83 ay[idxs[nz-1]] += sum; 84 } else { 85 for (j=0;j<nz-1;j++) { 86 ay[idxs[j]] += val; 87 } 88 ay[idxs[nz-1]] = 0.; 89 } 90 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 91 } 92 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 93 } 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 99 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 100 { 101 PetscErrorCode ierr; 102 103 PetscFunctionBegin; 104 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 110 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 111 { 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCBenignShellMat" 121 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 122 { 123 PC_IS *pcis = (PC_IS*)pc->data; 124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 125 PCBDDCBenignMatMult_ctx ctx; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 if (!restore) { 130 Mat A_IB,A_BI; 131 PetscScalar *work; 132 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 133 134 if (pcbddc->benign_original_mat) { 135 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 136 } 137 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 138 PetscFunctionReturn(0); 139 } 140 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 141 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 142 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 143 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 146 ierr = PetscNew(&ctx);CHKERRQ(ierr); 147 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 148 ctx->apply_left = PETSC_TRUE; 149 ctx->apply_right = PETSC_FALSE; 150 ctx->apply_p0 = PETSC_FALSE; 151 ctx->benign_n = pcbddc->benign_n; 152 if (reuse) { 153 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 154 ctx->free = PETSC_FALSE; 155 } else { /* TODO: could be optimized for successive solves */ 156 ISLocalToGlobalMapping N_to_D; 157 PetscInt i; 158 159 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 160 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 161 for (i=0;i<pcbddc->benign_n;i++) { 162 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 163 } 164 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 165 ctx->free = PETSC_TRUE; 166 } 167 ctx->A = pcis->A_IB; 168 ctx->work = work; 169 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 170 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 pcis->A_IB = A_IB; 173 174 /* A_BI as A_IB^T */ 175 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 176 pcbddc->benign_original_mat = pcis->A_BI; 177 pcis->A_BI = A_BI; 178 } else { 179 if (!pcbddc->benign_original_mat) { 180 PetscFunctionReturn(0); 181 } 182 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 183 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 184 pcis->A_IB = ctx->A; 185 ctx->A = NULL; 186 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 187 pcis->A_BI = pcbddc->benign_original_mat; 188 pcbddc->benign_original_mat = NULL; 189 if (ctx->free) { 190 PetscInt i; 191 for (i=0;i<ctx->benign_n;i++) { 192 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 193 } 194 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 195 } 196 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 197 ierr = PetscFree(ctx);CHKERRQ(ierr); 198 } 199 PetscFunctionReturn(0); 200 } 201 202 /* used just in bddc debug mode */ 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCBDDCBenignProject" 205 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 206 { 207 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 208 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 209 Mat An; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 214 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 215 if (is1) { 216 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 217 ierr = MatDestroy(&An);CHKERRQ(ierr); 218 } else { 219 *B = An; 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* TODO: add reuse flag */ 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatSeqAIJCompress" 227 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 228 { 229 Mat Bt; 230 PetscScalar *a,*bdata; 231 const PetscInt *ii,*ij; 232 PetscInt m,n,i,nnz,*bii,*bij; 233 PetscBool flg_row; 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 238 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 239 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 240 nnz = n; 241 for (i=0;i<ii[n];i++) { 242 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 243 } 244 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 247 nnz = 0; 248 bii[0] = 0; 249 for (i=0;i<n;i++) { 250 PetscInt j; 251 for (j=ii[i];j<ii[i+1];j++) { 252 PetscScalar entry = a[j]; 253 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 254 bij[nnz] = ij[j]; 255 bdata[nnz] = entry; 256 nnz++; 257 } 258 } 259 bii[i+1] = nnz; 260 } 261 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 262 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 263 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 264 { 265 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 266 b->free_a = PETSC_TRUE; 267 b->free_ij = PETSC_TRUE; 268 } 269 *B = Bt; 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatDetectDisconnectedComponents" 275 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 276 { 277 Mat B; 278 IS is_dummy,*cc_n; 279 ISLocalToGlobalMapping l2gmap_dummy; 280 PCBDDCGraph graph; 281 PetscInt i,n; 282 PetscInt *xadj,*adjncy; 283 PetscInt *xadj_filtered,*adjncy_filtered; 284 PetscBool flg_row,isseqaij; 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 if (!A->rmap->N || !A->cmap->N) { 289 *ncc = 0; 290 *cc = NULL; 291 PetscFunctionReturn(0); 292 } 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 294 if (!isseqaij && filter) { 295 PetscBool isseqdense; 296 297 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 298 if (!isseqdense) { 299 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 300 } else { /* TODO: rectangular case and LDA */ 301 PetscScalar *array; 302 PetscReal chop=1.e-6; 303 304 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 305 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 306 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 307 for (i=0;i<n;i++) { 308 PetscInt j; 309 for (j=i+1;j<n;j++) { 310 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 311 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 312 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 313 } 314 } 315 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 316 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 317 } 318 } else { 319 B = A; 320 } 321 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 322 323 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 324 if (filter) { 325 PetscScalar *data; 326 PetscInt j,cum; 327 328 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 329 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 330 cum = 0; 331 for (i=0;i<n;i++) { 332 PetscInt t; 333 334 for (j=xadj[i];j<xadj[i+1];j++) { 335 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 336 continue; 337 } 338 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 339 } 340 t = xadj_filtered[i]; 341 xadj_filtered[i] = cum; 342 cum += t; 343 } 344 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 345 } else { 346 xadj_filtered = NULL; 347 adjncy_filtered = NULL; 348 } 349 350 /* compute local connected components using PCBDDCGraph */ 351 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 353 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 354 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 355 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 357 if (xadj_filtered) { 358 graph->xadj = xadj_filtered; 359 graph->adjncy = adjncy_filtered; 360 } else { 361 graph->xadj = xadj; 362 graph->adjncy = adjncy; 363 } 364 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 365 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 366 /* partial clean up */ 367 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 368 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 369 if (A != B) { 370 ierr = MatDestroy(&B);CHKERRQ(ierr); 371 } 372 373 /* get back data */ 374 if (ncc) *ncc = graph->ncc; 375 if (cc) { 376 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 377 for (i=0;i<graph->ncc;i++) { 378 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 379 } 380 *cc = cc_n; 381 } 382 /* clean up graph */ 383 graph->xadj = 0; 384 graph->adjncy = 0; 385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "PCBDDCBenignCheck" 391 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 392 { 393 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 394 PC_IS* pcis = (PC_IS*)(pc->data); 395 IS dirIS = NULL; 396 PetscInt i; 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 401 if (zerodiag) { 402 Mat A; 403 Vec vec3_N; 404 PetscScalar *vals; 405 const PetscInt *idxs; 406 PetscInt nz,*count; 407 408 /* p0 */ 409 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 410 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 411 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 413 for (i=0;i<nz;i++) vals[i] = 1.; 414 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 416 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 417 /* v_I */ 418 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 419 for (i=0;i<nz;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 422 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 423 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 424 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 426 if (dirIS) { 427 PetscInt n; 428 429 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 430 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 431 for (i=0;i<n;i++) vals[i] = 0.; 432 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 434 } 435 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 437 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 438 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 439 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 440 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 441 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 442 if (PetscAbsScalar(vals[0]) > 1.e-1) { 443 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 444 } 445 ierr = PetscFree(vals);CHKERRQ(ierr); 446 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 447 448 /* there should not be any pressure dofs lying on the interface */ 449 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 450 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 451 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 452 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 453 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 454 for (i=0;i<nz;i++) { 455 if (count[idxs[i]]) { 456 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 457 } 458 } 459 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 460 ierr = PetscFree(count);CHKERRQ(ierr); 461 } 462 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 463 464 /* check PCBDDCBenignGetOrSetP0 */ 465 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 466 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 467 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 468 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 469 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 470 for (i=0;i<pcbddc->benign_n;i++) { 471 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 472 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 473 } 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 480 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 481 { 482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 483 IS pressures,zerodiag,*zerodiag_subs; 484 PetscInt nz,n; 485 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 491 for (n=0;n<pcbddc->benign_n;n++) { 492 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 495 pcbddc->benign_n = 0; 496 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 497 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 498 Checks if all the pressure dofs in each subdomain have a zero diagonal 499 If not, a change of basis on pressures is not needed 500 since the local Schur complements are SPD 501 */ 502 has_null_pressures = PETSC_TRUE; 503 have_null = PETSC_TRUE; 504 if (pcbddc->n_ISForDofsLocal) { 505 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 506 507 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 508 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 509 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 510 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 511 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 512 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 513 if (!sorted) { 514 ierr = ISSort(pressures);CHKERRQ(ierr); 515 } 516 } else { 517 pressures = NULL; 518 } 519 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 520 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 521 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 522 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 523 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 524 if (!sorted) { 525 ierr = ISSort(zerodiag);CHKERRQ(ierr); 526 } 527 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 528 if (!nz) { 529 if (n) have_null = PETSC_FALSE; 530 has_null_pressures = PETSC_FALSE; 531 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 532 } 533 recompute_zerodiag = PETSC_FALSE; 534 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 535 zerodiag_subs = NULL; 536 pcbddc->benign_n = 0; 537 if (has_null_pressures) { 538 IS *subs; 539 PetscInt nsubs,i; 540 541 subs = pcbddc->local_subs; 542 nsubs = pcbddc->n_local_subs; 543 if (nsubs > 1) { 544 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 545 for (i=0;i<nsubs;i++) { 546 ISLocalToGlobalMapping l2g; 547 IS t_zerodiag_subs; 548 PetscInt nl; 549 550 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 551 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 552 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 553 if (nl) { 554 PetscBool valid = PETSC_TRUE; 555 556 if (pressures) { 557 IS t_pressure_subs; 558 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 559 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 560 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 561 } 562 if (valid) { 563 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 564 pcbddc->benign_n++; 565 } else { 566 recompute_zerodiag = PETSC_TRUE; 567 } 568 } 569 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 571 } 572 } else { /* there's just one subdomain (or zero if they have not been detected */ 573 PetscBool valid = PETSC_TRUE; 574 if (pressures) { 575 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 576 } 577 if (valid) { 578 pcbddc->benign_n = 1; 579 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 580 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 581 zerodiag_subs[0] = zerodiag; 582 } 583 } 584 } 585 586 if (!pcbddc->benign_n) { 587 PetscInt n; 588 589 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 590 recompute_zerodiag = PETSC_FALSE; 591 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 592 if (n) { 593 has_null_pressures = PETSC_FALSE; 594 have_null = PETSC_FALSE; 595 } 596 } 597 598 /* final check for null pressures */ 599 if (zerodiag && pressures) { 600 PetscInt nz,np; 601 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 602 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 603 if (nz != np) have_null = PETSC_FALSE; 604 } 605 606 if (recompute_zerodiag) { 607 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 608 if (pcbddc->benign_n == 1) { 609 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 610 zerodiag = zerodiag_subs[0]; 611 } else { 612 PetscInt i,nzn,*new_idxs; 613 614 nzn = 0; 615 for (i=0;i<pcbddc->benign_n;i++) { 616 PetscInt ns; 617 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 618 nzn += ns; 619 } 620 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 621 nzn = 0; 622 for (i=0;i<pcbddc->benign_n;i++) { 623 PetscInt ns,*idxs; 624 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 625 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 626 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 627 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 628 nzn += ns; 629 } 630 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 631 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 632 } 633 have_null = PETSC_FALSE; 634 } 635 636 /* no-net-flux */ 637 if (pcbddc->benign_compute_nonetflux) { 638 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 639 MatNullSpace near_null_space; 640 ISLocalToGlobalMapping rmap; 641 Vec quad_vec; 642 PetscScalar *pvals; 643 PetscInt i,np,*dummyins; 644 645 /* create vector to hold quadrature weights */ 646 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 647 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 648 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 649 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 650 651 /* compute B^{(i)T} * 1_p */ 652 np = 0; 653 if (zerodiag) { 654 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 655 } else if (pressures) { 656 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 657 } 658 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 659 for (i=0;i<np;i++) pvals[i] = 1.; 660 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 661 if (np) { 662 const PetscInt *pidxs; 663 664 if (zerodiag) { 665 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 666 } else if (pressures) { 667 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 668 } 669 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 670 if (zerodiag) { 671 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 672 } else if (pressures) { 673 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 674 } 675 } 676 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 677 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 678 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 679 ierr = PetscFree(pvals);CHKERRQ(ierr); 680 681 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 682 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 683 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 684 for (i=0;i<matis->sf->nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 685 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 686 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 687 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 688 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 689 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 690 for (i=0;i<matis->sf->nleaves;i++) { 691 if (PetscGlobalRank != matis->sf_leafdata[i]) { 692 pvals[i] = 0.; 693 } 694 } 695 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 696 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 697 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 698 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 699 ierr = PetscFree(dummyins);CHKERRQ(ierr); 700 701 /* assembly quadrature vec and attach near null space to pmat */ 702 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 703 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 704 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 705 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 706 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 707 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 708 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 709 } 710 711 /* change of basis and p0 dofs */ 712 if (has_null_pressures) { 713 IS zerodiagc; 714 const PetscInt *idxs,*idxsc; 715 PetscInt i,s,*nnz; 716 717 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 718 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 719 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 720 /* local change of basis for pressures */ 721 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 722 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 723 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 724 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 725 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 726 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 727 for (i=0;i<pcbddc->benign_n;i++) { 728 PetscInt nzs,j; 729 730 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 731 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 732 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 733 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 734 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 735 } 736 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 737 ierr = PetscFree(nnz);CHKERRQ(ierr); 738 /* set identity on velocities */ 739 for (i=0;i<n-nz;i++) { 740 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 741 } 742 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 743 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 744 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 745 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 746 /* set change on pressures */ 747 for (s=0;s<pcbddc->benign_n;s++) { 748 PetscScalar *array; 749 PetscInt nzs; 750 751 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 752 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 753 for (i=0;i<nzs-1;i++) { 754 PetscScalar vals[2]; 755 PetscInt cols[2]; 756 757 cols[0] = idxs[i]; 758 cols[1] = idxs[nzs-1]; 759 vals[0] = 1.; 760 vals[1] = 1.; 761 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 762 } 763 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 764 for (i=0;i<nzs-1;i++) array[i] = -1.; 765 array[nzs-1] = 1.; 766 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 767 /* store local idxs for p0 */ 768 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 769 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 770 ierr = PetscFree(array);CHKERRQ(ierr); 771 } 772 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 /* project if needed */ 775 if (pcbddc->benign_change_explicit) { 776 Mat M; 777 778 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 780 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 781 ierr = MatDestroy(&M);CHKERRQ(ierr); 782 } 783 /* store global idxs for p0 */ 784 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 785 } 786 pcbddc->benign_zerodiag_subs = zerodiag_subs; 787 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 788 789 /* determines if the coarse solver will be singular or not */ 790 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 791 /* determines if the problem has subdomains with 0 pressure block */ 792 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 793 *zerodiaglocal = zerodiag; 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 799 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 800 { 801 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 802 PetscScalar *array; 803 PetscErrorCode ierr; 804 805 PetscFunctionBegin; 806 if (!pcbddc->benign_sf) { 807 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 808 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 809 } 810 if (get) { 811 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 812 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 813 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 814 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 815 } else { 816 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 817 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 819 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 826 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 827 { 828 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 /* TODO: add error checking 833 - avoid nested pop (or push) calls. 834 - cannot push before pop. 835 - cannot call this if pcbddc->local_mat is NULL 836 */ 837 if (!pcbddc->benign_n) { 838 PetscFunctionReturn(0); 839 } 840 if (pop) { 841 if (pcbddc->benign_change_explicit) { 842 IS is_p0; 843 MatReuse reuse; 844 845 /* extract B_0 */ 846 reuse = MAT_INITIAL_MATRIX; 847 if (pcbddc->benign_B0) { 848 reuse = MAT_REUSE_MATRIX; 849 } 850 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 851 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 852 /* remove rows and cols from local problem */ 853 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 854 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 855 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 856 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 857 } else { 858 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 859 PetscScalar *vals; 860 PetscInt i,n,*idxs_ins; 861 862 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 863 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 864 if (!pcbddc->benign_B0) { 865 PetscInt *nnz; 866 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 867 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 868 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 869 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 870 for (i=0;i<pcbddc->benign_n;i++) { 871 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 872 nnz[i] = n - nnz[i]; 873 } 874 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 875 ierr = PetscFree(nnz);CHKERRQ(ierr); 876 } 877 878 for (i=0;i<pcbddc->benign_n;i++) { 879 PetscScalar *array; 880 PetscInt *idxs,j,nz,cum; 881 882 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 883 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 884 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 885 for (j=0;j<nz;j++) vals[j] = 1.; 886 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 887 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 888 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 889 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 890 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 891 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 892 cum = 0; 893 for (j=0;j<n;j++) { 894 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 895 vals[cum] = array[j]; 896 idxs_ins[cum] = j; 897 cum++; 898 } 899 } 900 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 901 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 902 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 903 } 904 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 905 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 906 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 907 } 908 } else { /* push */ 909 if (pcbddc->benign_change_explicit) { 910 PetscInt i; 911 912 for (i=0;i<pcbddc->benign_n;i++) { 913 PetscScalar *B0_vals; 914 PetscInt *B0_cols,B0_ncol; 915 916 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 917 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 918 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 919 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 920 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 921 } 922 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 923 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924 } else { 925 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 926 } 927 } 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "PCBDDCAdaptiveSelection" 933 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 934 { 935 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 936 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 937 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 938 PetscBLASInt *B_iwork,*B_ifail; 939 PetscScalar *work,lwork; 940 PetscScalar *St,*S,*eigv; 941 PetscScalar *Sarray,*Starray; 942 PetscReal *eigs,thresh; 943 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 944 PetscBool allocated_S_St; 945 #if defined(PETSC_USE_COMPLEX) 946 PetscReal *rwork; 947 #endif 948 PetscErrorCode ierr; 949 950 PetscFunctionBegin; 951 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 952 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 953 954 if (pcbddc->dbg_flag) { 955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 957 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 958 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 959 } 960 961 if (pcbddc->dbg_flag) { 962 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 963 } 964 965 /* max size of subsets */ 966 mss = 0; 967 for (i=0;i<sub_schurs->n_subs;i++) { 968 PetscInt subset_size; 969 970 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 971 mss = PetscMax(mss,subset_size); 972 } 973 974 /* min/max and threshold */ 975 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 976 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 977 nmax = PetscMax(nmin,nmax); 978 allocated_S_St = PETSC_FALSE; 979 if (nmin) { 980 allocated_S_St = PETSC_TRUE; 981 } 982 983 /* allocate lapack workspace */ 984 cum = cum2 = 0; 985 maxneigs = 0; 986 for (i=0;i<sub_schurs->n_subs;i++) { 987 PetscInt n,subset_size; 988 989 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 990 n = PetscMin(subset_size,nmax); 991 cum += subset_size; 992 cum2 += subset_size*n; 993 maxneigs = PetscMax(maxneigs,n); 994 } 995 if (mss) { 996 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 997 PetscBLASInt B_itype = 1; 998 PetscBLASInt B_N = mss; 999 PetscReal zero = 0.0; 1000 PetscReal eps = 0.0; /* dlamch? */ 1001 1002 B_lwork = -1; 1003 S = NULL; 1004 St = NULL; 1005 eigs = NULL; 1006 eigv = NULL; 1007 B_iwork = NULL; 1008 B_ifail = NULL; 1009 #if defined(PETSC_USE_COMPLEX) 1010 rwork = NULL; 1011 #endif 1012 thresh = 1.0; 1013 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1014 #if defined(PETSC_USE_COMPLEX) 1015 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1016 #else 1017 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1018 #endif 1019 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1020 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1021 } else { 1022 /* TODO */ 1023 } 1024 } else { 1025 lwork = 0; 1026 } 1027 1028 nv = 0; 1029 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1030 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1031 } 1032 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1033 if (allocated_S_St) { 1034 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1035 } 1036 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1037 #if defined(PETSC_USE_COMPLEX) 1038 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1039 #endif 1040 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1041 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1042 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1043 nv+cum,&pcbddc->adaptive_constraints_idxs, 1044 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1045 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1046 1047 maxneigs = 0; 1048 cum = cumarray = 0; 1049 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1050 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1051 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1052 const PetscInt *idxs; 1053 1054 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1055 for (cum=0;cum<nv;cum++) { 1056 pcbddc->adaptive_constraints_n[cum] = 1; 1057 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1058 pcbddc->adaptive_constraints_data[cum] = 1.0; 1059 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1060 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1061 } 1062 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1063 } 1064 1065 if (mss) { /* multilevel */ 1066 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1067 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1068 } 1069 1070 thresh = pcbddc->adaptive_threshold; 1071 for (i=0;i<sub_schurs->n_subs;i++) { 1072 const PetscInt *idxs; 1073 PetscReal upper,lower; 1074 PetscInt j,subset_size,eigs_start = 0; 1075 PetscBLASInt B_N; 1076 PetscBool same_data = PETSC_FALSE; 1077 1078 if (pcbddc->use_deluxe_scaling) { 1079 upper = PETSC_MAX_REAL; 1080 lower = thresh; 1081 } else { 1082 upper = 1./thresh; 1083 lower = 0.; 1084 } 1085 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1086 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1087 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1088 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1089 if (sub_schurs->is_hermitian) { 1090 PetscInt j,k; 1091 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1092 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1093 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1094 } 1095 for (j=0;j<subset_size;j++) { 1096 for (k=j;k<subset_size;k++) { 1097 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1098 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1099 } 1100 } 1101 } else { 1102 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1103 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1104 } 1105 } else { 1106 S = Sarray + cumarray; 1107 St = Starray + cumarray; 1108 } 1109 /* see if we can save some work */ 1110 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1111 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1112 } 1113 1114 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1115 B_neigs = 0; 1116 } else { 1117 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1118 PetscBLASInt B_itype = 1; 1119 PetscBLASInt B_IL, B_IU; 1120 PetscReal eps = -1.0; /* dlamch? */ 1121 PetscInt nmin_s; 1122 PetscBool compute_range = PETSC_FALSE; 1123 1124 if (pcbddc->dbg_flag) { 1125 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1126 } 1127 1128 compute_range = PETSC_FALSE; 1129 if (thresh > 1.+PETSC_SMALL && !same_data) { 1130 compute_range = PETSC_TRUE; 1131 } 1132 1133 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1134 if (compute_range) { 1135 1136 /* ask for eigenvalues larger than thresh */ 1137 #if defined(PETSC_USE_COMPLEX) 1138 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1139 #else 1140 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1141 #endif 1142 } else if (!same_data) { 1143 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1144 B_IL = 1; 1145 #if defined(PETSC_USE_COMPLEX) 1146 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1147 #else 1148 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1149 #endif 1150 } else { /* same_data is true, so get the adaptive function requested by the user */ 1151 PetscInt k; 1152 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1153 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1154 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1155 nmin = nmax; 1156 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1157 for (k=0;k<nmax;k++) { 1158 eigs[k] = 1./PETSC_SMALL; 1159 eigv[k*(subset_size+1)] = 1.0; 1160 } 1161 } 1162 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1163 if (B_ierr) { 1164 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1165 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1166 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1167 } 1168 1169 if (B_neigs > nmax) { 1170 if (pcbddc->dbg_flag) { 1171 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1172 } 1173 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1174 B_neigs = nmax; 1175 } 1176 1177 nmin_s = PetscMin(nmin,B_N); 1178 if (B_neigs < nmin_s) { 1179 PetscBLASInt B_neigs2; 1180 1181 if (pcbddc->use_deluxe_scaling) { 1182 B_IL = B_N - nmin_s + 1; 1183 B_IU = B_N - B_neigs; 1184 } else { 1185 B_IL = B_neigs + 1; 1186 B_IU = nmin_s; 1187 } 1188 if (pcbddc->dbg_flag) { 1189 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1190 } 1191 if (sub_schurs->is_hermitian) { 1192 PetscInt j; 1193 for (j=0;j<subset_size;j++) { 1194 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1195 } 1196 for (j=0;j<subset_size;j++) { 1197 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1198 } 1199 } else { 1200 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1201 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1202 } 1203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_COMPLEX) 1205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1206 #else 1207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1208 #endif 1209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1210 B_neigs += B_neigs2; 1211 } 1212 if (B_ierr) { 1213 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1214 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1215 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1216 } 1217 if (pcbddc->dbg_flag) { 1218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1219 for (j=0;j<B_neigs;j++) { 1220 if (eigs[j] == 0.0) { 1221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1222 } else { 1223 if (pcbddc->use_deluxe_scaling) { 1224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1225 } else { 1226 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1227 } 1228 } 1229 } 1230 } 1231 } else { 1232 /* TODO */ 1233 } 1234 } 1235 /* change the basis back to the original one */ 1236 if (sub_schurs->change) { 1237 Mat change,phi,phit; 1238 1239 if (pcbddc->dbg_flag > 1) { 1240 PetscInt ii; 1241 for (ii=0;ii<B_neigs;ii++) { 1242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1243 for (j=0;j<B_N;j++) { 1244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1245 } 1246 } 1247 } 1248 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1250 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1251 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1252 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1253 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1254 } 1255 maxneigs = PetscMax(B_neigs,maxneigs); 1256 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1257 if (B_neigs) { 1258 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1259 1260 if (pcbddc->dbg_flag > 1) { 1261 PetscInt ii; 1262 for (ii=0;ii<B_neigs;ii++) { 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1264 for (j=0;j<B_N;j++) { 1265 #if defined(PETSC_USE_COMPLEX) 1266 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1267 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1269 #else 1270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1271 #endif 1272 } 1273 } 1274 } 1275 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1276 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1277 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1278 cum++; 1279 } 1280 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1281 /* shift for next computation */ 1282 cumarray += subset_size*subset_size; 1283 } 1284 if (pcbddc->dbg_flag) { 1285 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1286 } 1287 1288 if (mss) { 1289 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1290 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1291 /* destroy matrices (junk) */ 1292 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1293 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1294 } 1295 if (allocated_S_St) { 1296 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1297 } 1298 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1299 #if defined(PETSC_USE_COMPLEX) 1300 ierr = PetscFree(rwork);CHKERRQ(ierr); 1301 #endif 1302 if (pcbddc->dbg_flag) { 1303 PetscInt maxneigs_r; 1304 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "PCBDDCSetUpSolvers" 1312 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1313 { 1314 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1315 PetscScalar *coarse_submat_vals; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1320 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1321 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1322 1323 /* Setup local neumann solver ksp_R */ 1324 /* PCBDDCSetUpLocalScatters should be called first! */ 1325 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1326 1327 /* Change global null space passed in by the user if change of basis has been requested */ 1328 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1329 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1330 } 1331 1332 /* 1333 Setup local correction and local part of coarse basis. 1334 Gives back the dense local part of the coarse matrix in column major ordering 1335 */ 1336 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1337 1338 /* Compute total number of coarse nodes and setup coarse solver */ 1339 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1340 1341 /* free */ 1342 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "PCBDDCResetCustomization" 1348 PetscErrorCode PCBDDCResetCustomization(PC pc) 1349 { 1350 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1355 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1356 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1357 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1358 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1359 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1360 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1361 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1362 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1363 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1364 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1365 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "PCBDDCResetTopography" 1371 PetscErrorCode PCBDDCResetTopography(PC pc) 1372 { 1373 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1374 PetscInt i; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1379 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1380 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1381 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1382 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1384 for (i=0;i<pcbddc->n_local_subs;i++) { 1385 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1388 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "PCBDDCResetSolvers" 1394 PetscErrorCode PCBDDCResetSolvers(PC pc) 1395 { 1396 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1401 if (pcbddc->coarse_phi_B) { 1402 PetscScalar *array; 1403 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1404 ierr = PetscFree(array);CHKERRQ(ierr); 1405 } 1406 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1407 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1408 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1409 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1410 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1411 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1412 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1413 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1414 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1415 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1416 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1417 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1418 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1419 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1420 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1421 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1422 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1423 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1424 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1425 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1426 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1427 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1428 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1429 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1430 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1431 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1432 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1433 if (pcbddc->benign_zerodiag_subs) { 1434 PetscInt i; 1435 for (i=0;i<pcbddc->benign_n;i++) { 1436 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1437 } 1438 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1439 } 1440 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1446 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1447 { 1448 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1449 PC_IS *pcis = (PC_IS*)pc->data; 1450 VecType impVecType; 1451 PetscInt n_constraints,n_R,old_size; 1452 PetscErrorCode ierr; 1453 1454 PetscFunctionBegin; 1455 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1456 /* get sizes */ 1457 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1458 n_R = pcis->n - pcbddc->n_vertices; 1459 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1460 /* local work vectors (try to avoid unneeded work)*/ 1461 /* R nodes */ 1462 old_size = -1; 1463 if (pcbddc->vec1_R) { 1464 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1465 } 1466 if (n_R != old_size) { 1467 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1468 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1469 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1470 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1471 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1472 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1473 } 1474 /* local primal dofs */ 1475 old_size = -1; 1476 if (pcbddc->vec1_P) { 1477 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1478 } 1479 if (pcbddc->local_primal_size != old_size) { 1480 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1481 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1482 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1483 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1484 } 1485 /* local explicit constraints */ 1486 old_size = -1; 1487 if (pcbddc->vec1_C) { 1488 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1489 } 1490 if (n_constraints && n_constraints != old_size) { 1491 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1493 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1494 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "PCBDDCSetUpCorrection" 1501 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1502 { 1503 PetscErrorCode ierr; 1504 /* pointers to pcis and pcbddc */ 1505 PC_IS* pcis = (PC_IS*)pc->data; 1506 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1507 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1508 /* submatrices of local problem */ 1509 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1510 /* submatrices of local coarse problem */ 1511 Mat S_VV,S_CV,S_VC,S_CC; 1512 /* working matrices */ 1513 Mat C_CR; 1514 /* additional working stuff */ 1515 PC pc_R; 1516 Mat F; 1517 Vec dummy_vec; 1518 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1519 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1520 PetscScalar *work; 1521 PetscInt *idx_V_B; 1522 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1523 PetscInt i,n_R,n_D,n_B; 1524 1525 /* some shortcuts to scalars */ 1526 PetscScalar one=1.0,m_one=-1.0; 1527 1528 PetscFunctionBegin; 1529 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1530 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1531 } 1532 1533 /* Set Non-overlapping dimensions */ 1534 n_vertices = pcbddc->n_vertices; 1535 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1536 n_B = pcis->n_B; 1537 n_D = pcis->n - n_B; 1538 n_R = pcis->n - n_vertices; 1539 1540 /* vertices in boundary numbering */ 1541 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1542 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1543 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1544 1545 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1546 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1547 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1548 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1550 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1551 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1552 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1553 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1554 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1555 1556 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1557 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1558 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1559 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1560 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1561 lda_rhs = n_R; 1562 need_benign_correction = PETSC_FALSE; 1563 if (isLU || isILU || isCHOL) { 1564 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1565 } else if (sub_schurs->reuse_solver) { 1566 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1567 MatFactorType type; 1568 1569 F = reuse_solver->F; 1570 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1571 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1572 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1573 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1574 } else { 1575 F = NULL; 1576 } 1577 1578 /* allocate workspace */ 1579 n = 0; 1580 if (n_constraints) { 1581 n += lda_rhs*n_constraints; 1582 } 1583 if (n_vertices) { 1584 n = PetscMax(2*lda_rhs*n_vertices,n); 1585 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1586 } 1587 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1588 1589 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1590 dummy_vec = NULL; 1591 if (need_benign_correction && lda_rhs != n_R && F) { 1592 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1593 } 1594 1595 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1596 if (n_constraints) { 1597 Mat M1,M2,M3,C_B; 1598 IS is_aux; 1599 PetscScalar *array,*array2; 1600 1601 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1602 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1603 1604 /* Extract constraints on R nodes: C_{CR} */ 1605 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1606 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1607 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1608 1609 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1610 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1611 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1612 for (i=0;i<n_constraints;i++) { 1613 const PetscScalar *row_cmat_values; 1614 const PetscInt *row_cmat_indices; 1615 PetscInt size_of_constraint,j; 1616 1617 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1618 for (j=0;j<size_of_constraint;j++) { 1619 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1620 } 1621 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1622 } 1623 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1624 if (F) { 1625 Mat B; 1626 1627 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1628 if (need_benign_correction) { 1629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1630 1631 /* rhs is already zero on interior dofs, no need to change the rhs */ 1632 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1633 } 1634 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1635 if (need_benign_correction) { 1636 PetscScalar *marr; 1637 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1638 1639 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1640 if (lda_rhs != n_R) { 1641 for (i=0;i<n_constraints;i++) { 1642 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1643 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1644 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1645 } 1646 } else { 1647 for (i=0;i<n_constraints;i++) { 1648 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1649 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1650 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1651 } 1652 } 1653 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1654 } 1655 ierr = MatDestroy(&B);CHKERRQ(ierr); 1656 } else { 1657 PetscScalar *marr; 1658 1659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1660 for (i=0;i<n_constraints;i++) { 1661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1666 } 1667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1668 } 1669 if (!pcbddc->switch_static) { 1670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1673 for (i=0;i<n_constraints;i++) { 1674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1680 } 1681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1683 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1684 } else { 1685 if (lda_rhs != n_R) { 1686 IS dummy; 1687 1688 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1689 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1690 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1691 } else { 1692 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1693 pcbddc->local_auxmat2 = local_auxmat2_R; 1694 } 1695 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1696 } 1697 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1698 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1699 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1700 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1701 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1702 if (isCHOL) { 1703 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1704 } else { 1705 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1706 } 1707 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1708 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1709 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1710 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1711 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1712 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1713 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1714 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1715 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1716 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1717 } 1718 1719 /* Get submatrices from subdomain matrix */ 1720 if (n_vertices) { 1721 IS is_aux; 1722 1723 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1724 IS tis; 1725 1726 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1727 ierr = ISSort(tis);CHKERRQ(ierr); 1728 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1729 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1730 } else { 1731 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1732 } 1733 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1734 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1735 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1736 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1737 } 1738 1739 /* Matrix of coarse basis functions (local) */ 1740 if (pcbddc->coarse_phi_B) { 1741 PetscInt on_B,on_primal,on_D=n_D; 1742 if (pcbddc->coarse_phi_D) { 1743 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1744 } 1745 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1746 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1747 PetscScalar *marray; 1748 1749 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1750 ierr = PetscFree(marray);CHKERRQ(ierr); 1751 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1752 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1753 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1754 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1755 } 1756 } 1757 1758 if (!pcbddc->coarse_phi_B) { 1759 PetscScalar *marray; 1760 1761 n = n_B*pcbddc->local_primal_size; 1762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1763 n += n_D*pcbddc->local_primal_size; 1764 } 1765 if (!pcbddc->symmetric_primal) { 1766 n *= 2; 1767 } 1768 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1770 n = n_B*pcbddc->local_primal_size; 1771 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1773 n += n_D*pcbddc->local_primal_size; 1774 } 1775 if (!pcbddc->symmetric_primal) { 1776 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1777 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1778 n = n_B*pcbddc->local_primal_size; 1779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1780 } 1781 } else { 1782 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1783 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1784 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1785 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1786 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1787 } 1788 } 1789 } 1790 1791 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1792 p0_lidx_I = NULL; 1793 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1794 const PetscInt *idxs; 1795 1796 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1797 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1798 for (i=0;i<pcbddc->benign_n;i++) { 1799 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1800 } 1801 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1802 } 1803 1804 /* vertices */ 1805 if (n_vertices) { 1806 1807 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1808 1809 if (n_R) { 1810 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1811 PetscBLASInt B_N,B_one = 1; 1812 PetscScalar *x,*y; 1813 PetscBool isseqaij; 1814 1815 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1816 if (need_benign_correction) { 1817 ISLocalToGlobalMapping RtoN; 1818 IS is_p0; 1819 PetscInt *idxs_p0,n; 1820 1821 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1822 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1823 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1824 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1825 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1826 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1827 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1828 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1829 } 1830 1831 if (lda_rhs == n_R) { 1832 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1833 } else { 1834 PetscScalar *av,*array; 1835 const PetscInt *xadj,*adjncy; 1836 PetscInt n; 1837 PetscBool flg_row; 1838 1839 array = work+lda_rhs*n_vertices; 1840 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1841 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1842 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1843 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1844 for (i=0;i<n;i++) { 1845 PetscInt j; 1846 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1847 } 1848 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1849 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1851 } 1852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1853 if (need_benign_correction) { 1854 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1855 PetscScalar *marr; 1856 1857 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1858 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1859 1860 | 0 0 0 | (V) 1861 L = | 0 0 -1 | (P-p0) 1862 | 0 0 -1 | (p0) 1863 1864 */ 1865 for (i=0;i<reuse_solver->benign_n;i++) { 1866 const PetscScalar *vals; 1867 const PetscInt *idxs,*idxs_zero; 1868 PetscInt n,j,nz; 1869 1870 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1871 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1872 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1873 for (j=0;j<n;j++) { 1874 PetscScalar val = vals[j]; 1875 PetscInt k,col = idxs[j]; 1876 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1877 } 1878 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1879 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1880 } 1881 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1882 } 1883 if (F) { 1884 /* need to correct the rhs */ 1885 if (need_benign_correction) { 1886 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1887 PetscScalar *marr; 1888 1889 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1890 if (lda_rhs != n_R) { 1891 for (i=0;i<n_vertices;i++) { 1892 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1893 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1894 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1895 } 1896 } else { 1897 for (i=0;i<n_vertices;i++) { 1898 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1899 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1901 } 1902 } 1903 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1904 } 1905 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1906 /* need to correct the solution */ 1907 if (need_benign_correction) { 1908 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1909 PetscScalar *marr; 1910 1911 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1912 if (lda_rhs != n_R) { 1913 for (i=0;i<n_vertices;i++) { 1914 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1915 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1916 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1917 } 1918 } else { 1919 for (i=0;i<n_vertices;i++) { 1920 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1921 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1922 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1923 } 1924 } 1925 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1926 } 1927 } else { 1928 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1929 for (i=0;i<n_vertices;i++) { 1930 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1931 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1932 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1933 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1934 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1935 } 1936 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1937 } 1938 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1939 /* S_VV and S_CV */ 1940 if (n_constraints) { 1941 Mat B; 1942 1943 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1944 for (i=0;i<n_vertices;i++) { 1945 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1946 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1947 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1948 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1951 } 1952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1953 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1954 ierr = MatDestroy(&B);CHKERRQ(ierr); 1955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1956 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1957 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1959 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1960 ierr = MatDestroy(&B);CHKERRQ(ierr); 1961 } 1962 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1963 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1964 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1965 } 1966 if (lda_rhs != n_R) { 1967 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1968 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1969 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1970 } 1971 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1972 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1973 if (need_benign_correction) { 1974 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1975 PetscScalar *marr,*sums; 1976 1977 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1978 ierr = MatDenseGetArray(S_VVt,&marr); 1979 for (i=0;i<reuse_solver->benign_n;i++) { 1980 const PetscScalar *vals; 1981 const PetscInt *idxs,*idxs_zero; 1982 PetscInt n,j,nz; 1983 1984 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1985 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1986 for (j=0;j<n_vertices;j++) { 1987 PetscInt k; 1988 sums[j] = 0.; 1989 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1990 } 1991 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1992 for (j=0;j<n;j++) { 1993 PetscScalar val = vals[j]; 1994 PetscInt k; 1995 for (k=0;k<n_vertices;k++) { 1996 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1997 } 1998 } 1999 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2000 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2001 } 2002 ierr = PetscFree(sums);CHKERRQ(ierr); 2003 ierr = MatDenseRestoreArray(S_VVt,&marr); 2004 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2005 } 2006 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2008 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2009 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2010 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2011 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2012 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2013 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2014 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2015 } else { 2016 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2017 } 2018 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2019 2020 /* coarse basis functions */ 2021 for (i=0;i<n_vertices;i++) { 2022 PetscScalar *y; 2023 2024 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2025 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2026 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2027 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2028 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2029 y[n_B*i+idx_V_B[i]] = 1.0; 2030 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2031 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2032 2033 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2034 PetscInt j; 2035 2036 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2037 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2038 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2039 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2040 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2041 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2042 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2043 } 2044 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2045 } 2046 /* if n_R == 0 the object is not destroyed */ 2047 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2048 } 2049 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2050 2051 if (n_constraints) { 2052 Mat B; 2053 2054 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2055 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2056 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2057 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2058 if (n_vertices) { 2059 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2060 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2061 } else { 2062 Mat S_VCt; 2063 2064 if (lda_rhs != n_R) { 2065 ierr = MatDestroy(&B);CHKERRQ(ierr); 2066 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2067 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2068 } 2069 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2070 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2071 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2072 } 2073 } 2074 ierr = MatDestroy(&B);CHKERRQ(ierr); 2075 /* coarse basis functions */ 2076 for (i=0;i<n_constraints;i++) { 2077 PetscScalar *y; 2078 2079 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2080 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2081 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2082 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2083 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2085 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2086 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2087 PetscInt j; 2088 2089 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2090 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2091 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2092 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2094 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2095 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2096 } 2097 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2098 } 2099 } 2100 if (n_constraints) { 2101 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2102 } 2103 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2104 2105 /* coarse matrix entries relative to B_0 */ 2106 if (pcbddc->benign_n) { 2107 Mat B0_B,B0_BPHI; 2108 IS is_dummy; 2109 PetscScalar *data; 2110 PetscInt j; 2111 2112 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2113 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2114 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2115 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2116 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2117 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2118 for (j=0;j<pcbddc->benign_n;j++) { 2119 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2120 for (i=0;i<pcbddc->local_primal_size;i++) { 2121 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2122 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2123 } 2124 } 2125 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2126 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2127 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2128 } 2129 2130 /* compute other basis functions for non-symmetric problems */ 2131 if (!pcbddc->symmetric_primal) { 2132 Mat B_V=NULL,B_C=NULL; 2133 PetscScalar *marray; 2134 2135 if (n_constraints) { 2136 Mat S_CCT,C_CRT; 2137 2138 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2139 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2140 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2141 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2142 if (n_vertices) { 2143 Mat S_VCT; 2144 2145 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2146 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2147 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2148 } 2149 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2150 } 2151 if (n_vertices && n_R) { 2152 PetscScalar *av,*marray; 2153 const PetscInt *xadj,*adjncy; 2154 PetscInt n; 2155 PetscBool flg_row; 2156 2157 /* B_V = B_V - A_VR^T */ 2158 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2159 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2160 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2161 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2162 for (i=0;i<n;i++) { 2163 PetscInt j; 2164 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2165 } 2166 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2167 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2168 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2169 } 2170 2171 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2172 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2173 for (i=0;i<n_vertices;i++) { 2174 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2175 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2176 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2177 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2178 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2179 } 2180 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2181 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2182 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2183 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2184 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2185 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2186 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2187 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2188 } 2189 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2190 /* coarse basis functions */ 2191 for (i=0;i<pcbddc->local_primal_size;i++) { 2192 PetscScalar *y; 2193 2194 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2195 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2196 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2197 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2198 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2199 if (i<n_vertices) { 2200 y[n_B*i+idx_V_B[i]] = 1.0; 2201 } 2202 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2203 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2204 2205 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2206 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2207 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2208 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2209 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2210 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2211 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2212 } 2213 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2214 } 2215 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2216 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2217 } 2218 /* free memory */ 2219 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2220 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2221 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2222 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2223 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2224 ierr = PetscFree(work);CHKERRQ(ierr); 2225 if (n_vertices) { 2226 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2227 } 2228 if (n_constraints) { 2229 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2230 } 2231 /* Checking coarse_sub_mat and coarse basis functios */ 2232 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2233 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2234 if (pcbddc->dbg_flag) { 2235 Mat coarse_sub_mat; 2236 Mat AUXMAT,TM1,TM2,TM3,TM4; 2237 Mat coarse_phi_D,coarse_phi_B; 2238 Mat coarse_psi_D,coarse_psi_B; 2239 Mat A_II,A_BB,A_IB,A_BI; 2240 Mat C_B,CPHI; 2241 IS is_dummy; 2242 Vec mones; 2243 MatType checkmattype=MATSEQAIJ; 2244 PetscReal real_value; 2245 2246 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2247 Mat A; 2248 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2249 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2250 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2251 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2252 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2253 ierr = MatDestroy(&A);CHKERRQ(ierr); 2254 } else { 2255 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2256 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2257 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2258 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2259 } 2260 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2261 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2262 if (!pcbddc->symmetric_primal) { 2263 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2264 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2265 } 2266 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2267 2268 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2271 if (!pcbddc->symmetric_primal) { 2272 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2274 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2276 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2277 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2278 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2279 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2280 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2281 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2282 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2283 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2284 } else { 2285 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2286 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2287 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2288 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2289 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2290 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2291 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2292 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2293 } 2294 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2295 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2296 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2297 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2298 if (pcbddc->benign_n) { 2299 Mat B0_B,B0_BPHI; 2300 PetscScalar *data,*data2; 2301 PetscInt j; 2302 2303 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2304 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2305 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2306 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2307 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2308 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2309 for (j=0;j<pcbddc->benign_n;j++) { 2310 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2311 for (i=0;i<pcbddc->local_primal_size;i++) { 2312 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2313 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2314 } 2315 } 2316 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2317 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2318 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2319 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2320 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2321 } 2322 #if 0 2323 { 2324 PetscViewer viewer; 2325 char filename[256]; 2326 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2327 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2328 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2329 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2330 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2331 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2332 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2333 if (save_change) { 2334 Mat phi_B; 2335 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2337 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2338 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2339 } else { 2340 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2341 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2342 } 2343 if (pcbddc->coarse_phi_D) { 2344 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2345 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2346 } 2347 if (pcbddc->coarse_psi_B) { 2348 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2349 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2350 } 2351 if (pcbddc->coarse_psi_D) { 2352 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2353 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2354 } 2355 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2356 } 2357 #endif 2358 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2359 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2360 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2362 2363 /* check constraints */ 2364 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2365 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2366 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2367 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2368 } else { 2369 PetscScalar *data; 2370 Mat tmat; 2371 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2372 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2373 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2374 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2375 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2376 } 2377 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2378 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2379 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2380 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2381 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2382 if (!pcbddc->symmetric_primal) { 2383 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2384 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2385 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2386 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2388 } 2389 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2390 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2391 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2392 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2394 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2395 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2396 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2397 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2398 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2399 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2400 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2401 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2402 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2403 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2404 if (!pcbddc->symmetric_primal) { 2405 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2406 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2407 } 2408 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2409 } 2410 /* get back data */ 2411 *coarse_submat_vals_n = coarse_submat_vals; 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2417 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2418 { 2419 Mat *work_mat; 2420 IS isrow_s,iscol_s; 2421 PetscBool rsorted,csorted; 2422 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2427 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2428 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2429 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2430 2431 if (!rsorted) { 2432 const PetscInt *idxs; 2433 PetscInt *idxs_sorted,i; 2434 2435 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2436 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2437 for (i=0;i<rsize;i++) { 2438 idxs_perm_r[i] = i; 2439 } 2440 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2441 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2442 for (i=0;i<rsize;i++) { 2443 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2444 } 2445 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2446 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2447 } else { 2448 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2449 isrow_s = isrow; 2450 } 2451 2452 if (!csorted) { 2453 if (isrow == iscol) { 2454 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2455 iscol_s = isrow_s; 2456 } else { 2457 const PetscInt *idxs; 2458 PetscInt *idxs_sorted,i; 2459 2460 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2461 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2462 for (i=0;i<csize;i++) { 2463 idxs_perm_c[i] = i; 2464 } 2465 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2466 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2467 for (i=0;i<csize;i++) { 2468 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2469 } 2470 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2471 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2472 } 2473 } else { 2474 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2475 iscol_s = iscol; 2476 } 2477 2478 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2479 2480 if (!rsorted || !csorted) { 2481 Mat new_mat; 2482 IS is_perm_r,is_perm_c; 2483 2484 if (!rsorted) { 2485 PetscInt *idxs_r,i; 2486 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2487 for (i=0;i<rsize;i++) { 2488 idxs_r[idxs_perm_r[i]] = i; 2489 } 2490 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2491 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2492 } else { 2493 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2494 } 2495 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2496 2497 if (!csorted) { 2498 if (isrow_s == iscol_s) { 2499 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2500 is_perm_c = is_perm_r; 2501 } else { 2502 PetscInt *idxs_c,i; 2503 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2504 for (i=0;i<csize;i++) { 2505 idxs_c[idxs_perm_c[i]] = i; 2506 } 2507 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2508 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2509 } 2510 } else { 2511 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2512 } 2513 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2514 2515 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2516 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2517 work_mat[0] = new_mat; 2518 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2519 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2520 } 2521 2522 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2523 *B = work_mat[0]; 2524 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2525 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2526 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2532 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2533 { 2534 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2535 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2536 Mat new_mat; 2537 IS is_local,is_global; 2538 PetscInt local_size; 2539 PetscBool isseqaij; 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2544 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2545 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2546 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2547 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2548 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2549 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2550 2551 /* check */ 2552 if (pcbddc->dbg_flag) { 2553 Vec x,x_change; 2554 PetscReal error; 2555 2556 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2557 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2558 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2559 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2560 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2561 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2562 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2563 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2564 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2565 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2568 ierr = VecDestroy(&x);CHKERRQ(ierr); 2569 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2570 } 2571 2572 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2573 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2574 if (isseqaij) { 2575 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2576 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2577 } else { 2578 Mat work_mat; 2579 2580 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2581 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2582 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2583 } 2584 if (matis->A->symmetric_set) { 2585 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2586 #if !defined(PETSC_USE_COMPLEX) 2587 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2588 #endif 2589 } 2590 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 #undef __FUNCT__ 2595 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2596 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2597 { 2598 PC_IS* pcis = (PC_IS*)(pc->data); 2599 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2600 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2601 PetscInt *idx_R_local=NULL; 2602 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2603 PetscInt vbs,bs; 2604 PetscBT bitmask=NULL; 2605 PetscErrorCode ierr; 2606 2607 PetscFunctionBegin; 2608 /* 2609 No need to setup local scatters if 2610 - primal space is unchanged 2611 AND 2612 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2613 AND 2614 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2615 */ 2616 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2617 PetscFunctionReturn(0); 2618 } 2619 /* destroy old objects */ 2620 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2621 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2622 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2623 /* Set Non-overlapping dimensions */ 2624 n_B = pcis->n_B; 2625 n_D = pcis->n - n_B; 2626 n_vertices = pcbddc->n_vertices; 2627 2628 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2629 2630 /* create auxiliary bitmask and allocate workspace */ 2631 if (!sub_schurs->reuse_solver) { 2632 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2633 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2634 for (i=0;i<n_vertices;i++) { 2635 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2636 } 2637 2638 for (i=0, n_R=0; i<pcis->n; i++) { 2639 if (!PetscBTLookup(bitmask,i)) { 2640 idx_R_local[n_R++] = i; 2641 } 2642 } 2643 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2644 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2645 2646 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2647 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2648 } 2649 2650 /* Block code */ 2651 vbs = 1; 2652 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2653 if (bs>1 && !(n_vertices%bs)) { 2654 PetscBool is_blocked = PETSC_TRUE; 2655 PetscInt *vary; 2656 if (!sub_schurs->reuse_solver) { 2657 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2658 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2659 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2660 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2661 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2662 for (i=0; i<pcis->n/bs; i++) { 2663 if (vary[i]!=0 && vary[i]!=bs) { 2664 is_blocked = PETSC_FALSE; 2665 break; 2666 } 2667 } 2668 ierr = PetscFree(vary);CHKERRQ(ierr); 2669 } else { 2670 /* Verify directly the R set */ 2671 for (i=0; i<n_R/bs; i++) { 2672 PetscInt j,node=idx_R_local[bs*i]; 2673 for (j=1; j<bs; j++) { 2674 if (node != idx_R_local[bs*i+j]-j) { 2675 is_blocked = PETSC_FALSE; 2676 break; 2677 } 2678 } 2679 } 2680 } 2681 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2682 vbs = bs; 2683 for (i=0;i<n_R/vbs;i++) { 2684 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2685 } 2686 } 2687 } 2688 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2689 if (sub_schurs->reuse_solver) { 2690 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2691 2692 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2693 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2694 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2695 reuse_solver->is_R = pcbddc->is_R_local; 2696 } else { 2697 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2698 } 2699 2700 /* print some info if requested */ 2701 if (pcbddc->dbg_flag) { 2702 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2709 } 2710 2711 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2712 if (!sub_schurs->reuse_solver) { 2713 IS is_aux1,is_aux2; 2714 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2715 2716 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2717 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2718 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2719 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2720 for (i=0; i<n_D; i++) { 2721 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2722 } 2723 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2724 for (i=0, j=0; i<n_R; i++) { 2725 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2726 aux_array1[j++] = i; 2727 } 2728 } 2729 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2730 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2731 for (i=0, j=0; i<n_B; i++) { 2732 if (!PetscBTLookup(bitmask,is_indices[i])) { 2733 aux_array2[j++] = i; 2734 } 2735 } 2736 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2738 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2739 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2740 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2741 2742 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2743 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2744 for (i=0, j=0; i<n_R; i++) { 2745 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2746 aux_array1[j++] = i; 2747 } 2748 } 2749 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2750 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2751 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2752 } 2753 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2754 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2755 } else { 2756 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2757 IS tis; 2758 PetscInt schur_size; 2759 2760 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2761 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2762 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2763 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2764 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2765 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2766 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2767 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2768 } 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2776 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2777 { 2778 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2779 PC_IS *pcis = (PC_IS*)pc->data; 2780 PC pc_temp; 2781 Mat A_RR; 2782 MatReuse reuse; 2783 PetscScalar m_one = -1.0; 2784 PetscReal value; 2785 PetscInt n_D,n_R; 2786 PetscBool use_exact,use_exact_reduced,issbaij; 2787 PetscErrorCode ierr; 2788 /* prefixes stuff */ 2789 char dir_prefix[256],neu_prefix[256],str_level[16]; 2790 size_t len; 2791 2792 PetscFunctionBegin; 2793 2794 /* compute prefixes */ 2795 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2796 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2797 if (!pcbddc->current_level) { 2798 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2799 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2800 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2801 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2802 } else { 2803 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2804 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2805 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2806 len -= 15; /* remove "pc_bddc_coarse_" */ 2807 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2808 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2809 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2810 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2811 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2812 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2813 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2814 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2815 } 2816 2817 /* DIRICHLET PROBLEM */ 2818 if (dirichlet) { 2819 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2820 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2821 if (!sub_schurs->reuse_solver) { 2822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2823 } 2824 if (pcbddc->dbg_flag) { 2825 Mat A_IIn; 2826 2827 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2828 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2829 pcis->A_II = A_IIn; 2830 } 2831 } 2832 if (pcbddc->local_mat->symmetric_set) { 2833 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2834 } 2835 /* Matrix for Dirichlet problem is pcis->A_II */ 2836 n_D = pcis->n - pcis->n_B; 2837 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2838 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2839 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2840 /* default */ 2841 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2842 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2843 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2844 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2845 if (issbaij) { 2846 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2847 } else { 2848 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2849 } 2850 /* Allow user's customization */ 2851 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2852 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2853 } 2854 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2855 if (sub_schurs->reuse_solver) { 2856 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2857 2858 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2859 } 2860 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2861 if (!n_D) { 2862 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2863 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2864 } 2865 /* Set Up KSP for Dirichlet problem of BDDC */ 2866 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2867 /* set ksp_D into pcis data */ 2868 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2869 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2870 pcis->ksp_D = pcbddc->ksp_D; 2871 } 2872 2873 /* NEUMANN PROBLEM */ 2874 A_RR = 0; 2875 if (neumann) { 2876 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2877 PetscInt ibs,mbs; 2878 PetscBool issbaij; 2879 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2880 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2881 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2882 if (pcbddc->ksp_R) { /* already created ksp */ 2883 PetscInt nn_R; 2884 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2885 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2886 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2887 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2888 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2889 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2890 reuse = MAT_INITIAL_MATRIX; 2891 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2892 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2893 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2894 reuse = MAT_INITIAL_MATRIX; 2895 } else { /* safe to reuse the matrix */ 2896 reuse = MAT_REUSE_MATRIX; 2897 } 2898 } 2899 /* last check */ 2900 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2901 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2902 reuse = MAT_INITIAL_MATRIX; 2903 } 2904 } else { /* first time, so we need to create the matrix */ 2905 reuse = MAT_INITIAL_MATRIX; 2906 } 2907 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2908 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2909 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2910 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2911 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2912 if (matis->A == pcbddc->local_mat) { 2913 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2914 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2915 } else { 2916 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2917 } 2918 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2919 if (matis->A == pcbddc->local_mat) { 2920 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2921 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2922 } else { 2923 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2924 } 2925 } 2926 /* extract A_RR */ 2927 if (sub_schurs->reuse_solver) { 2928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2929 2930 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2931 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2932 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2933 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2934 } else { 2935 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2936 } 2937 } else { 2938 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2939 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2940 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2941 } 2942 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2943 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2944 } 2945 if (pcbddc->local_mat->symmetric_set) { 2946 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2947 } 2948 if (!pcbddc->ksp_R) { /* create object if not present */ 2949 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2950 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2951 /* default */ 2952 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2953 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2954 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2955 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2956 if (issbaij) { 2957 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2958 } else { 2959 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2960 } 2961 /* Allow user's customization */ 2962 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2963 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2964 } 2965 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2966 if (!n_R) { 2967 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2968 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2969 } 2970 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2971 /* Reuse solver if it is present */ 2972 if (sub_schurs->reuse_solver) { 2973 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2974 2975 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2976 } 2977 /* Set Up KSP for Neumann problem of BDDC */ 2978 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2979 } 2980 2981 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2982 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2983 if (pcbddc->dbg_flag) { 2984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2985 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2986 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2987 } 2988 if (dirichlet) { /* Dirichlet */ 2989 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2990 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2991 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2992 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2993 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2994 /* need to be adapted? */ 2995 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2996 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2997 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2998 /* print info */ 2999 if (pcbddc->dbg_flag) { 3000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3002 } 3003 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3004 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3005 } 3006 } 3007 if (neumann) { /* Neumann */ 3008 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3009 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3010 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3011 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3012 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3013 /* need to be adapted? */ 3014 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3015 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3016 /* print info */ 3017 if (pcbddc->dbg_flag) { 3018 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3019 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3020 } 3021 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3022 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3023 } 3024 } 3025 } 3026 /* free Neumann problem's matrix */ 3027 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3033 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3034 { 3035 PetscErrorCode ierr; 3036 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3037 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3038 3039 PetscFunctionBegin; 3040 if (!sub_schurs->reuse_solver) { 3041 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3042 } 3043 if (!pcbddc->switch_static) { 3044 if (applytranspose && pcbddc->local_auxmat1) { 3045 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3046 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3047 } 3048 if (!sub_schurs->reuse_solver) { 3049 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3050 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3051 } else { 3052 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3053 3054 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3055 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3056 } 3057 } else { 3058 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3059 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3060 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3061 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3062 if (applytranspose && pcbddc->local_auxmat1) { 3063 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3064 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3065 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3066 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3067 } 3068 } 3069 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3070 if (applytranspose) { 3071 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3072 } else { 3073 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3074 } 3075 } else { 3076 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3077 3078 if (applytranspose) { 3079 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3080 } else { 3081 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3082 } 3083 } 3084 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3085 if (!pcbddc->switch_static) { 3086 if (!sub_schurs->reuse_solver) { 3087 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3088 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3089 } else { 3090 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3091 3092 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3093 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3094 } 3095 if (!applytranspose && pcbddc->local_auxmat1) { 3096 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3097 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3098 } 3099 } else { 3100 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 if (!applytranspose && pcbddc->local_auxmat1) { 3105 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3106 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3107 } 3108 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3110 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3111 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3112 } 3113 PetscFunctionReturn(0); 3114 } 3115 3116 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3117 #undef __FUNCT__ 3118 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3119 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3120 { 3121 PetscErrorCode ierr; 3122 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3123 PC_IS* pcis = (PC_IS*) (pc->data); 3124 const PetscScalar zero = 0.0; 3125 3126 PetscFunctionBegin; 3127 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3128 if (!pcbddc->benign_apply_coarse_only) { 3129 if (applytranspose) { 3130 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3131 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3132 } else { 3133 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3134 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3135 } 3136 } else { 3137 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3138 } 3139 3140 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3141 if (pcbddc->benign_n) { 3142 PetscScalar *array; 3143 PetscInt j; 3144 3145 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3146 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3147 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3148 } 3149 3150 /* start communications from local primal nodes to rhs of coarse solver */ 3151 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3152 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3153 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3154 3155 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3156 /* TODO remove null space when doing multilevel */ 3157 if (pcbddc->coarse_ksp) { 3158 Mat coarse_mat; 3159 Vec rhs,sol; 3160 MatNullSpace nullsp; 3161 PetscBool isbddc = PETSC_FALSE; 3162 3163 if (pcbddc->benign_have_null) { 3164 PC coarse_pc; 3165 3166 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3167 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3168 /* we need to propagate to coarser levels the need for a possible benign correction */ 3169 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3170 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3171 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3172 } 3173 } 3174 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3175 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3176 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3177 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3178 if (nullsp) { 3179 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3180 } 3181 if (applytranspose) { 3182 if (pcbddc->benign_apply_coarse_only) { /* need just to apply the coarse preconditioner */ 3183 PC coarse_pc; 3184 3185 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3186 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3187 ierr = PCApplyTranspose(coarse_pc,rhs,sol);CHKERRQ(ierr); 3188 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3189 } else { 3190 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3191 } 3192 } else { 3193 if (pcbddc->benign_apply_coarse_only) { /* need just to apply the coarse preconditioner */ 3194 PC coarse_pc; 3195 3196 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3197 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3198 ierr = PCApply(coarse_pc,rhs,sol);CHKERRQ(ierr); 3199 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3200 } else { 3201 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3202 } 3203 } 3204 /* we don't the benign correction at coarser levels anymore */ 3205 if (pcbddc->benign_have_null && isbddc) { 3206 PC coarse_pc; 3207 PC_BDDC* coarsepcbddc; 3208 3209 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3210 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3211 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3212 } 3213 if (nullsp) { 3214 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3215 } 3216 } 3217 3218 /* Local solution on R nodes */ 3219 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3220 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3221 } 3222 /* communications from coarse sol to local primal nodes */ 3223 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3224 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3225 3226 /* Sum contributions from the two levels */ 3227 if (!pcbddc->benign_apply_coarse_only) { 3228 if (applytranspose) { 3229 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3230 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3231 } else { 3232 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3233 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3234 } 3235 /* store p0 */ 3236 if (pcbddc->benign_n) { 3237 PetscScalar *array; 3238 PetscInt j; 3239 3240 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3241 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3242 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3243 } 3244 } else { /* expand the coarse solution */ 3245 if (applytranspose) { 3246 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3247 } else { 3248 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3249 } 3250 } 3251 PetscFunctionReturn(0); 3252 } 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3256 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3257 { 3258 PetscErrorCode ierr; 3259 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3260 PetscScalar *array; 3261 Vec from,to; 3262 3263 PetscFunctionBegin; 3264 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3265 from = pcbddc->coarse_vec; 3266 to = pcbddc->vec1_P; 3267 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3268 Vec tvec; 3269 3270 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3271 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3272 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3273 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3274 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3275 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3276 } 3277 } else { /* from local to global -> put data in coarse right hand side */ 3278 from = pcbddc->vec1_P; 3279 to = pcbddc->coarse_vec; 3280 } 3281 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3282 PetscFunctionReturn(0); 3283 } 3284 3285 #undef __FUNCT__ 3286 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3287 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3288 { 3289 PetscErrorCode ierr; 3290 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3291 PetscScalar *array; 3292 Vec from,to; 3293 3294 PetscFunctionBegin; 3295 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3296 from = pcbddc->coarse_vec; 3297 to = pcbddc->vec1_P; 3298 } else { /* from local to global -> put data in coarse right hand side */ 3299 from = pcbddc->vec1_P; 3300 to = pcbddc->coarse_vec; 3301 } 3302 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3303 if (smode == SCATTER_FORWARD) { 3304 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3305 Vec tvec; 3306 3307 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3308 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3309 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3310 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3311 } 3312 } else { 3313 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3314 ierr = VecResetArray(from);CHKERRQ(ierr); 3315 } 3316 } 3317 PetscFunctionReturn(0); 3318 } 3319 3320 /* uncomment for testing purposes */ 3321 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3322 #undef __FUNCT__ 3323 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3324 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3325 { 3326 PetscErrorCode ierr; 3327 PC_IS* pcis = (PC_IS*)(pc->data); 3328 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3329 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3330 /* one and zero */ 3331 PetscScalar one=1.0,zero=0.0; 3332 /* space to store constraints and their local indices */ 3333 PetscScalar *constraints_data; 3334 PetscInt *constraints_idxs,*constraints_idxs_B; 3335 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3336 PetscInt *constraints_n; 3337 /* iterators */ 3338 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3339 /* BLAS integers */ 3340 PetscBLASInt lwork,lierr; 3341 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3342 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3343 /* reuse */ 3344 PetscInt olocal_primal_size,olocal_primal_size_cc; 3345 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3346 /* change of basis */ 3347 PetscBool qr_needed; 3348 PetscBT change_basis,qr_needed_idx; 3349 /* auxiliary stuff */ 3350 PetscInt *nnz,*is_indices; 3351 PetscInt ncc; 3352 /* some quantities */ 3353 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3354 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3355 3356 PetscFunctionBegin; 3357 /* Destroy Mat objects computed previously */ 3358 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3359 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3360 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3361 /* save info on constraints from previous setup (if any) */ 3362 olocal_primal_size = pcbddc->local_primal_size; 3363 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3364 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3365 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3366 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3367 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3368 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3369 3370 if (!pcbddc->adaptive_selection) { 3371 IS ISForVertices,*ISForFaces,*ISForEdges; 3372 MatNullSpace nearnullsp; 3373 const Vec *nearnullvecs; 3374 Vec *localnearnullsp; 3375 PetscScalar *array; 3376 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3377 PetscBool nnsp_has_cnst; 3378 /* LAPACK working arrays for SVD or POD */ 3379 PetscBool skip_lapack,boolforchange; 3380 PetscScalar *work; 3381 PetscReal *singular_vals; 3382 #if defined(PETSC_USE_COMPLEX) 3383 PetscReal *rwork; 3384 #endif 3385 #if defined(PETSC_MISSING_LAPACK_GESVD) 3386 PetscScalar *temp_basis,*correlation_mat; 3387 #else 3388 PetscBLASInt dummy_int=1; 3389 PetscScalar dummy_scalar=1.; 3390 #endif 3391 3392 /* Get index sets for faces, edges and vertices from graph */ 3393 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3394 /* print some info */ 3395 if (pcbddc->dbg_flag) { 3396 PetscInt nv; 3397 3398 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3399 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3400 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3405 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3406 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3407 } 3408 3409 /* free unneeded index sets */ 3410 if (!pcbddc->use_vertices) { 3411 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3412 } 3413 if (!pcbddc->use_edges) { 3414 for (i=0;i<n_ISForEdges;i++) { 3415 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3416 } 3417 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3418 n_ISForEdges = 0; 3419 } 3420 if (!pcbddc->use_faces) { 3421 for (i=0;i<n_ISForFaces;i++) { 3422 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3423 } 3424 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3425 n_ISForFaces = 0; 3426 } 3427 3428 #if defined(PETSC_USE_DEBUG) 3429 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3430 Also use_change_of_basis should be consistent among processors */ 3431 if (pcbddc->NullSpace) { 3432 PetscBool tbool[2],gbool[2]; 3433 3434 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3435 pcbddc->use_change_of_basis = PETSC_TRUE; 3436 if (!ISForEdges) { 3437 pcbddc->use_change_on_faces = PETSC_TRUE; 3438 } 3439 } 3440 tbool[0] = pcbddc->use_change_of_basis; 3441 tbool[1] = pcbddc->use_change_on_faces; 3442 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3443 pcbddc->use_change_of_basis = gbool[0]; 3444 pcbddc->use_change_on_faces = gbool[1]; 3445 } 3446 #endif 3447 3448 /* check if near null space is attached to global mat */ 3449 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3450 if (nearnullsp) { 3451 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3452 /* remove any stored info */ 3453 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3454 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3455 /* store information for BDDC solver reuse */ 3456 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3457 pcbddc->onearnullspace = nearnullsp; 3458 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3459 for (i=0;i<nnsp_size;i++) { 3460 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3461 } 3462 } else { /* if near null space is not provided BDDC uses constants by default */ 3463 nnsp_size = 0; 3464 nnsp_has_cnst = PETSC_TRUE; 3465 } 3466 /* get max number of constraints on a single cc */ 3467 max_constraints = nnsp_size; 3468 if (nnsp_has_cnst) max_constraints++; 3469 3470 /* 3471 Evaluate maximum storage size needed by the procedure 3472 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3473 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3474 There can be multiple constraints per connected component 3475 */ 3476 n_vertices = 0; 3477 if (ISForVertices) { 3478 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3479 } 3480 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3481 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3482 3483 total_counts = n_ISForFaces+n_ISForEdges; 3484 total_counts *= max_constraints; 3485 total_counts += n_vertices; 3486 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3487 3488 total_counts = 0; 3489 max_size_of_constraint = 0; 3490 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3491 IS used_is; 3492 if (i<n_ISForEdges) { 3493 used_is = ISForEdges[i]; 3494 } else { 3495 used_is = ISForFaces[i-n_ISForEdges]; 3496 } 3497 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3498 total_counts += j; 3499 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3500 } 3501 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3502 3503 /* get local part of global near null space vectors */ 3504 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3505 for (k=0;k<nnsp_size;k++) { 3506 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3507 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3508 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3509 } 3510 3511 /* whether or not to skip lapack calls */ 3512 skip_lapack = PETSC_TRUE; 3513 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3514 3515 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3516 if (!skip_lapack) { 3517 PetscScalar temp_work; 3518 3519 #if defined(PETSC_MISSING_LAPACK_GESVD) 3520 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3521 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3522 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3523 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3524 #if defined(PETSC_USE_COMPLEX) 3525 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3526 #endif 3527 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3528 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3529 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3530 lwork = -1; 3531 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3532 #if !defined(PETSC_USE_COMPLEX) 3533 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3534 #else 3535 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3536 #endif 3537 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3538 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3539 #else /* on missing GESVD */ 3540 /* SVD */ 3541 PetscInt max_n,min_n; 3542 max_n = max_size_of_constraint; 3543 min_n = max_constraints; 3544 if (max_size_of_constraint < max_constraints) { 3545 min_n = max_size_of_constraint; 3546 max_n = max_constraints; 3547 } 3548 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3549 #if defined(PETSC_USE_COMPLEX) 3550 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3551 #endif 3552 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3553 lwork = -1; 3554 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3555 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3556 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3557 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3558 #if !defined(PETSC_USE_COMPLEX) 3559 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3560 #else 3561 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3562 #endif 3563 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3564 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3565 #endif /* on missing GESVD */ 3566 /* Allocate optimal workspace */ 3567 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3568 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3569 } 3570 /* Now we can loop on constraining sets */ 3571 total_counts = 0; 3572 constraints_idxs_ptr[0] = 0; 3573 constraints_data_ptr[0] = 0; 3574 /* vertices */ 3575 if (n_vertices) { 3576 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3577 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3578 for (i=0;i<n_vertices;i++) { 3579 constraints_n[total_counts] = 1; 3580 constraints_data[total_counts] = 1.0; 3581 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3582 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3583 total_counts++; 3584 } 3585 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3586 n_vertices = total_counts; 3587 } 3588 3589 /* edges and faces */ 3590 total_counts_cc = total_counts; 3591 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3592 IS used_is; 3593 PetscBool idxs_copied = PETSC_FALSE; 3594 3595 if (ncc<n_ISForEdges) { 3596 used_is = ISForEdges[ncc]; 3597 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3598 } else { 3599 used_is = ISForFaces[ncc-n_ISForEdges]; 3600 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3601 } 3602 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3603 3604 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3605 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3606 /* change of basis should not be performed on local periodic nodes */ 3607 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3608 if (nnsp_has_cnst) { 3609 PetscScalar quad_value; 3610 3611 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3612 idxs_copied = PETSC_TRUE; 3613 3614 if (!pcbddc->use_nnsp_true) { 3615 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3616 } else { 3617 quad_value = 1.0; 3618 } 3619 for (j=0;j<size_of_constraint;j++) { 3620 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3621 } 3622 temp_constraints++; 3623 total_counts++; 3624 } 3625 for (k=0;k<nnsp_size;k++) { 3626 PetscReal real_value; 3627 PetscScalar *ptr_to_data; 3628 3629 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3630 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3631 for (j=0;j<size_of_constraint;j++) { 3632 ptr_to_data[j] = array[is_indices[j]]; 3633 } 3634 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3635 /* check if array is null on the connected component */ 3636 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3637 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3638 if (real_value > 0.0) { /* keep indices and values */ 3639 temp_constraints++; 3640 total_counts++; 3641 if (!idxs_copied) { 3642 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3643 idxs_copied = PETSC_TRUE; 3644 } 3645 } 3646 } 3647 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3648 valid_constraints = temp_constraints; 3649 if (!pcbddc->use_nnsp_true && temp_constraints) { 3650 if (temp_constraints == 1) { /* just normalize the constraint */ 3651 PetscScalar norm,*ptr_to_data; 3652 3653 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3655 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3656 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3657 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3658 } else { /* perform SVD */ 3659 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3660 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3661 3662 #if defined(PETSC_MISSING_LAPACK_GESVD) 3663 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3664 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3665 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3666 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3667 from that computed using LAPACKgesvd 3668 -> This is due to a different computation of eigenvectors in LAPACKheev 3669 -> The quality of the POD-computed basis will be the same */ 3670 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3671 /* Store upper triangular part of correlation matrix */ 3672 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3673 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3674 for (j=0;j<temp_constraints;j++) { 3675 for (k=0;k<j+1;k++) { 3676 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3677 } 3678 } 3679 /* compute eigenvalues and eigenvectors of correlation matrix */ 3680 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3681 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3682 #if !defined(PETSC_USE_COMPLEX) 3683 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3684 #else 3685 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3686 #endif 3687 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3688 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3689 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3690 j = 0; 3691 while (j < temp_constraints && singular_vals[j] < tol) j++; 3692 total_counts = total_counts-j; 3693 valid_constraints = temp_constraints-j; 3694 /* scale and copy POD basis into used quadrature memory */ 3695 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3696 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3697 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3698 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3699 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3700 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3701 if (j<temp_constraints) { 3702 PetscInt ii; 3703 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3704 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3705 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3706 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3707 for (k=0;k<temp_constraints-j;k++) { 3708 for (ii=0;ii<size_of_constraint;ii++) { 3709 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3710 } 3711 } 3712 } 3713 #else /* on missing GESVD */ 3714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3715 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3717 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3718 #if !defined(PETSC_USE_COMPLEX) 3719 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3720 #else 3721 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3722 #endif 3723 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3724 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3725 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3726 k = temp_constraints; 3727 if (k > size_of_constraint) k = size_of_constraint; 3728 j = 0; 3729 while (j < k && singular_vals[k-j-1] < tol) j++; 3730 valid_constraints = k-j; 3731 total_counts = total_counts-temp_constraints+valid_constraints; 3732 #endif /* on missing GESVD */ 3733 } 3734 } 3735 /* update pointers information */ 3736 if (valid_constraints) { 3737 constraints_n[total_counts_cc] = valid_constraints; 3738 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3739 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3740 /* set change_of_basis flag */ 3741 if (boolforchange) { 3742 PetscBTSet(change_basis,total_counts_cc); 3743 } 3744 total_counts_cc++; 3745 } 3746 } 3747 /* free workspace */ 3748 if (!skip_lapack) { 3749 ierr = PetscFree(work);CHKERRQ(ierr); 3750 #if defined(PETSC_USE_COMPLEX) 3751 ierr = PetscFree(rwork);CHKERRQ(ierr); 3752 #endif 3753 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3754 #if defined(PETSC_MISSING_LAPACK_GESVD) 3755 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3756 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3757 #endif 3758 } 3759 for (k=0;k<nnsp_size;k++) { 3760 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3761 } 3762 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3763 /* free index sets of faces, edges and vertices */ 3764 for (i=0;i<n_ISForFaces;i++) { 3765 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3766 } 3767 if (n_ISForFaces) { 3768 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3769 } 3770 for (i=0;i<n_ISForEdges;i++) { 3771 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3772 } 3773 if (n_ISForEdges) { 3774 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3775 } 3776 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3777 } else { 3778 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3779 3780 total_counts = 0; 3781 n_vertices = 0; 3782 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3783 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3784 } 3785 max_constraints = 0; 3786 total_counts_cc = 0; 3787 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3788 total_counts += pcbddc->adaptive_constraints_n[i]; 3789 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3790 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3791 } 3792 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3793 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3794 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3795 constraints_data = pcbddc->adaptive_constraints_data; 3796 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3797 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3798 total_counts_cc = 0; 3799 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3800 if (pcbddc->adaptive_constraints_n[i]) { 3801 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3802 } 3803 } 3804 #if 0 3805 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3806 for (i=0;i<total_counts_cc;i++) { 3807 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3808 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3809 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3810 printf(" %d",constraints_idxs[j]); 3811 } 3812 printf("\n"); 3813 printf("number of cc: %d\n",constraints_n[i]); 3814 } 3815 for (i=0;i<n_vertices;i++) { 3816 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3817 } 3818 for (i=0;i<sub_schurs->n_subs;i++) { 3819 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3820 } 3821 #endif 3822 3823 max_size_of_constraint = 0; 3824 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3825 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3826 /* Change of basis */ 3827 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3828 if (pcbddc->use_change_of_basis) { 3829 for (i=0;i<sub_schurs->n_subs;i++) { 3830 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3831 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3832 } 3833 } 3834 } 3835 } 3836 pcbddc->local_primal_size = total_counts; 3837 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3838 3839 /* map constraints_idxs in boundary numbering */ 3840 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3841 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3842 3843 /* Create constraint matrix */ 3844 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3845 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3846 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3847 3848 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3849 /* determine if a QR strategy is needed for change of basis */ 3850 qr_needed = PETSC_FALSE; 3851 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3852 total_primal_vertices=0; 3853 pcbddc->local_primal_size_cc = 0; 3854 for (i=0;i<total_counts_cc;i++) { 3855 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3856 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3857 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3858 pcbddc->local_primal_size_cc += 1; 3859 } else if (PetscBTLookup(change_basis,i)) { 3860 for (k=0;k<constraints_n[i];k++) { 3861 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3862 } 3863 pcbddc->local_primal_size_cc += constraints_n[i]; 3864 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 3865 PetscBTSet(qr_needed_idx,i); 3866 qr_needed = PETSC_TRUE; 3867 } 3868 } else { 3869 pcbddc->local_primal_size_cc += 1; 3870 } 3871 } 3872 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3873 pcbddc->n_vertices = total_primal_vertices; 3874 /* permute indices in order to have a sorted set of vertices */ 3875 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3876 3877 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3878 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3879 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3880 3881 /* nonzero structure of constraint matrix */ 3882 /* and get reference dof for local constraints */ 3883 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3884 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3885 3886 j = total_primal_vertices; 3887 total_counts = total_primal_vertices; 3888 cum = total_primal_vertices; 3889 for (i=n_vertices;i<total_counts_cc;i++) { 3890 if (!PetscBTLookup(change_basis,i)) { 3891 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3892 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3893 cum++; 3894 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3895 for (k=0;k<constraints_n[i];k++) { 3896 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3897 nnz[j+k] = size_of_constraint; 3898 } 3899 j += constraints_n[i]; 3900 } 3901 } 3902 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3903 ierr = PetscFree(nnz);CHKERRQ(ierr); 3904 3905 /* set values in constraint matrix */ 3906 for (i=0;i<total_primal_vertices;i++) { 3907 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3908 } 3909 total_counts = total_primal_vertices; 3910 for (i=n_vertices;i<total_counts_cc;i++) { 3911 if (!PetscBTLookup(change_basis,i)) { 3912 PetscInt *cols; 3913 3914 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3915 cols = constraints_idxs+constraints_idxs_ptr[i]; 3916 for (k=0;k<constraints_n[i];k++) { 3917 PetscInt row = total_counts+k; 3918 PetscScalar *vals; 3919 3920 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3921 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3922 } 3923 total_counts += constraints_n[i]; 3924 } 3925 } 3926 /* assembling */ 3927 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3928 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3929 3930 /* 3931 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3932 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3933 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3934 */ 3935 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3936 if (pcbddc->use_change_of_basis) { 3937 /* dual and primal dofs on a single cc */ 3938 PetscInt dual_dofs,primal_dofs; 3939 /* working stuff for GEQRF */ 3940 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3941 PetscBLASInt lqr_work; 3942 /* working stuff for UNGQR */ 3943 PetscScalar *gqr_work,lgqr_work_t; 3944 PetscBLASInt lgqr_work; 3945 /* working stuff for TRTRS */ 3946 PetscScalar *trs_rhs; 3947 PetscBLASInt Blas_NRHS; 3948 /* pointers for values insertion into change of basis matrix */ 3949 PetscInt *start_rows,*start_cols; 3950 PetscScalar *start_vals; 3951 /* working stuff for values insertion */ 3952 PetscBT is_primal; 3953 PetscInt *aux_primal_numbering_B; 3954 /* matrix sizes */ 3955 PetscInt global_size,local_size; 3956 /* temporary change of basis */ 3957 Mat localChangeOfBasisMatrix; 3958 /* extra space for debugging */ 3959 PetscScalar *dbg_work; 3960 3961 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3962 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3963 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3964 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3965 /* nonzeros for local mat */ 3966 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3967 if (!pcbddc->benign_change || pcbddc->fake_change) { 3968 for (i=0;i<pcis->n;i++) nnz[i]=1; 3969 } else { 3970 const PetscInt *ii; 3971 PetscInt n; 3972 PetscBool flg_row; 3973 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3974 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3975 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3976 } 3977 for (i=n_vertices;i<total_counts_cc;i++) { 3978 if (PetscBTLookup(change_basis,i)) { 3979 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3980 if (PetscBTLookup(qr_needed_idx,i)) { 3981 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3982 } else { 3983 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3984 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3985 } 3986 } 3987 } 3988 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3989 ierr = PetscFree(nnz);CHKERRQ(ierr); 3990 /* Set interior change in the matrix */ 3991 if (!pcbddc->benign_change || pcbddc->fake_change) { 3992 for (i=0;i<pcis->n;i++) { 3993 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3994 } 3995 } else { 3996 const PetscInt *ii,*jj; 3997 PetscScalar *aa; 3998 PetscInt n; 3999 PetscBool flg_row; 4000 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4001 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4002 for (i=0;i<n;i++) { 4003 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4004 } 4005 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4006 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4007 } 4008 4009 if (pcbddc->dbg_flag) { 4010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4011 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4012 } 4013 4014 4015 /* Now we loop on the constraints which need a change of basis */ 4016 /* 4017 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4018 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4019 4020 Basic blocks of change of basis matrix T computed by 4021 4022 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4023 4024 | 1 0 ... 0 s_1/S | 4025 | 0 1 ... 0 s_2/S | 4026 | ... | 4027 | 0 ... 1 s_{n-1}/S | 4028 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4029 4030 with S = \sum_{i=1}^n s_i^2 4031 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4032 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4033 4034 - QR decomposition of constraints otherwise 4035 */ 4036 if (qr_needed) { 4037 /* space to store Q */ 4038 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4039 /* first we issue queries for optimal work */ 4040 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4041 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4042 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4043 lqr_work = -1; 4044 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4045 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4046 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4047 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4048 lgqr_work = -1; 4049 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4050 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4051 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4052 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4053 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4054 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4055 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4056 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4057 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4058 /* array to store scaling factors for reflectors */ 4059 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4060 /* array to store rhs and solution of triangular solver */ 4061 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4062 /* allocating workspace for check */ 4063 if (pcbddc->dbg_flag) { 4064 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4065 } 4066 } 4067 /* array to store whether a node is primal or not */ 4068 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4069 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4070 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4071 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4072 for (i=0;i<total_primal_vertices;i++) { 4073 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4074 } 4075 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4076 4077 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4078 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4079 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4080 if (PetscBTLookup(change_basis,total_counts)) { 4081 /* get constraint info */ 4082 primal_dofs = constraints_n[total_counts]; 4083 dual_dofs = size_of_constraint-primal_dofs; 4084 4085 if (pcbddc->dbg_flag) { 4086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4087 } 4088 4089 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4090 4091 /* copy quadrature constraints for change of basis check */ 4092 if (pcbddc->dbg_flag) { 4093 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4094 } 4095 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4096 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4097 4098 /* compute QR decomposition of constraints */ 4099 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4100 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4101 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4102 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4103 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4104 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4105 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4106 4107 /* explictly compute R^-T */ 4108 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4109 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4110 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4111 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4113 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4114 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4115 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4116 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4117 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4118 4119 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4120 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4122 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4124 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4125 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4126 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4128 4129 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4130 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4131 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4132 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4133 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4134 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4135 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4136 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4137 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4138 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4139 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4140 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4141 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4142 4143 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4144 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4145 /* insert cols for primal dofs */ 4146 for (j=0;j<primal_dofs;j++) { 4147 start_vals = &qr_basis[j*size_of_constraint]; 4148 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4149 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4150 } 4151 /* insert cols for dual dofs */ 4152 for (j=0,k=0;j<dual_dofs;k++) { 4153 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4154 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4155 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4156 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4157 j++; 4158 } 4159 } 4160 4161 /* check change of basis */ 4162 if (pcbddc->dbg_flag) { 4163 PetscInt ii,jj; 4164 PetscBool valid_qr=PETSC_TRUE; 4165 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4166 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4167 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4168 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4169 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4170 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4171 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4172 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4173 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4174 for (jj=0;jj<size_of_constraint;jj++) { 4175 for (ii=0;ii<primal_dofs;ii++) { 4176 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4177 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4178 } 4179 } 4180 if (!valid_qr) { 4181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4182 for (jj=0;jj<size_of_constraint;jj++) { 4183 for (ii=0;ii<primal_dofs;ii++) { 4184 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4185 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4186 } 4187 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4188 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4189 } 4190 } 4191 } 4192 } else { 4193 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4194 } 4195 } 4196 } else { /* simple transformation block */ 4197 PetscInt row,col; 4198 PetscScalar val,norm; 4199 4200 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4201 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4202 for (j=0;j<size_of_constraint;j++) { 4203 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4204 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4205 if (!PetscBTLookup(is_primal,row_B)) { 4206 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4207 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4208 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4209 } else { 4210 for (k=0;k<size_of_constraint;k++) { 4211 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4212 if (row != col) { 4213 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4214 } else { 4215 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4216 } 4217 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4218 } 4219 } 4220 } 4221 if (pcbddc->dbg_flag) { 4222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4223 } 4224 } 4225 } else { 4226 if (pcbddc->dbg_flag) { 4227 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4228 } 4229 } 4230 } 4231 4232 /* free workspace */ 4233 if (qr_needed) { 4234 if (pcbddc->dbg_flag) { 4235 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4236 } 4237 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4238 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4239 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4240 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4241 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4242 } 4243 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4244 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4245 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4246 4247 /* assembling of global change of variable */ 4248 if (!pcbddc->fake_change) { 4249 Mat tmat; 4250 PetscInt bs; 4251 4252 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4253 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4254 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4255 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4256 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4257 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4258 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4259 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4260 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4261 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4262 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4263 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4264 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4265 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4266 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4267 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4268 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4269 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4270 4271 /* check */ 4272 if (pcbddc->dbg_flag) { 4273 PetscReal error; 4274 Vec x,x_change; 4275 4276 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4277 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4278 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4279 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4280 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4281 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4282 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4283 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4284 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4285 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4286 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4287 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4289 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4290 ierr = VecDestroy(&x);CHKERRQ(ierr); 4291 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4292 } 4293 /* adapt sub_schurs computed (if any) */ 4294 if (pcbddc->use_deluxe_scaling) { 4295 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4296 4297 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4298 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4299 } 4300 if (sub_schurs->S_Ej_all) { 4301 Mat S_new,tmat; 4302 IS is_all_N,is_V_Sall = NULL; 4303 4304 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4305 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4306 if (pcbddc->deluxe_zerorows) { 4307 ISLocalToGlobalMapping NtoSall; 4308 IS is_V; 4309 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4310 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4311 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4312 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4313 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4314 } 4315 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4316 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4317 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4318 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4319 if (pcbddc->deluxe_zerorows) { 4320 const PetscScalar *array; 4321 const PetscInt *idxs_V,*idxs_all; 4322 PetscInt i,n_V; 4323 4324 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4325 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4326 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4327 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4328 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4329 for (i=0;i<n_V;i++) { 4330 PetscScalar val; 4331 PetscInt idx; 4332 4333 idx = idxs_V[i]; 4334 val = array[idxs_all[idxs_V[i]]]; 4335 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4336 } 4337 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4338 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4339 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4340 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4341 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4342 } 4343 sub_schurs->S_Ej_all = S_new; 4344 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4345 if (sub_schurs->sum_S_Ej_all) { 4346 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4347 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4348 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4349 if (pcbddc->deluxe_zerorows) { 4350 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4351 } 4352 sub_schurs->sum_S_Ej_all = S_new; 4353 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4354 } 4355 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4356 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4357 } 4358 /* destroy any change of basis context in sub_schurs */ 4359 if (sub_schurs->change) { 4360 PetscInt i; 4361 4362 for (i=0;i<sub_schurs->n_subs;i++) { 4363 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4364 } 4365 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4366 } 4367 } 4368 if (pcbddc->switch_static) { /* need to save the local change */ 4369 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4370 } else { 4371 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4372 } 4373 /* determine if any process has changed the pressures locally */ 4374 pcbddc->change_interior = pcbddc->benign_have_null; 4375 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4376 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4377 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4378 pcbddc->use_qr_single = qr_needed; 4379 } 4380 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4381 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4382 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4383 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4384 } else { 4385 Mat benign_global = NULL; 4386 if (pcbddc->benign_have_null) { 4387 Mat tmat; 4388 4389 pcbddc->change_interior = PETSC_TRUE; 4390 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4391 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4392 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4393 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4394 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4395 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4396 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4397 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4398 if (pcbddc->benign_change) { 4399 Mat M; 4400 4401 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4402 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4403 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4404 ierr = MatDestroy(&M);CHKERRQ(ierr); 4405 } else { 4406 Mat eye; 4407 PetscScalar *array; 4408 4409 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4410 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4411 for (i=0;i<pcis->n;i++) { 4412 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4413 } 4414 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4415 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4416 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4417 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4418 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4419 } 4420 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4421 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4422 } 4423 if (pcbddc->user_ChangeOfBasisMatrix) { 4424 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4425 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4426 } else if (pcbddc->benign_have_null) { 4427 pcbddc->ChangeOfBasisMatrix = benign_global; 4428 } 4429 } 4430 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4431 IS is_global; 4432 const PetscInt *gidxs; 4433 4434 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4435 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4436 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4437 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4438 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4439 } 4440 } 4441 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4442 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4443 } 4444 4445 if (!pcbddc->fake_change) { 4446 /* add pressure dofs to set of primal nodes for numbering purposes */ 4447 for (i=0;i<pcbddc->benign_n;i++) { 4448 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4449 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4450 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4451 pcbddc->local_primal_size_cc++; 4452 pcbddc->local_primal_size++; 4453 } 4454 4455 /* check if a new primal space has been introduced (also take into account benign trick) */ 4456 pcbddc->new_primal_space_local = PETSC_TRUE; 4457 if (olocal_primal_size == pcbddc->local_primal_size) { 4458 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4459 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4460 if (!pcbddc->new_primal_space_local) { 4461 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4462 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4463 } 4464 } 4465 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4466 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4467 } 4468 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4469 4470 /* flush dbg viewer */ 4471 if (pcbddc->dbg_flag) { 4472 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4473 } 4474 4475 /* free workspace */ 4476 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4477 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4478 if (!pcbddc->adaptive_selection) { 4479 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4480 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4481 } else { 4482 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4483 pcbddc->adaptive_constraints_idxs_ptr, 4484 pcbddc->adaptive_constraints_data_ptr, 4485 pcbddc->adaptive_constraints_idxs, 4486 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4487 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4488 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4489 } 4490 PetscFunctionReturn(0); 4491 } 4492 4493 #undef __FUNCT__ 4494 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4495 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4496 { 4497 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4498 PC_IS *pcis = (PC_IS*)pc->data; 4499 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4500 PetscInt ierr,i,N; 4501 4502 PetscFunctionBegin; 4503 /* Reset previously computed graph */ 4504 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4505 /* Init local Graph struct */ 4506 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4507 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4508 4509 /* Check validity of the csr graph passed in by the user */ 4510 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4511 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4512 } 4513 4514 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4515 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4516 PetscInt *xadj,*adjncy; 4517 PetscInt nvtxs; 4518 PetscBool flg_row=PETSC_FALSE; 4519 4520 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4521 if (flg_row) { 4522 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4523 pcbddc->computed_rowadj = PETSC_TRUE; 4524 } 4525 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4526 } 4527 if (pcbddc->dbg_flag) { 4528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4529 } 4530 4531 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4532 if (pcbddc->user_provided_isfordofs) { 4533 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4534 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4535 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4536 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4537 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4538 } 4539 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4540 pcbddc->n_ISForDofs = 0; 4541 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4542 } 4543 } else { 4544 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4545 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4546 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4547 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4548 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4549 } 4550 } 4551 } 4552 4553 /* Setup of Graph */ 4554 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4555 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4556 } 4557 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4558 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4559 } 4560 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4561 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4562 } 4563 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4564 4565 /* attach info on disconnected subdomains if present */ 4566 if (pcbddc->n_local_subs) { 4567 PetscInt *local_subs; 4568 4569 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4570 for (i=0;i<pcbddc->n_local_subs;i++) { 4571 const PetscInt *idxs; 4572 PetscInt nl,j; 4573 4574 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4575 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4576 for (j=0;j<nl;j++) { 4577 local_subs[idxs[j]] = i; 4578 } 4579 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4580 } 4581 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4582 pcbddc->mat_graph->local_subs = local_subs; 4583 } 4584 4585 /* Graph's connected components analysis */ 4586 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4587 PetscFunctionReturn(0); 4588 } 4589 4590 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4591 #undef __FUNCT__ 4592 #define __FUNCT__ "PCBDDCSubsetNumbering" 4593 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4594 { 4595 PetscSF sf; 4596 PetscLayout map; 4597 const PetscInt *idxs; 4598 PetscInt *leaf_data,*root_data,*gidxs; 4599 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4600 PetscInt n_n,nlocals,start,first_index; 4601 PetscMPIInt commsize; 4602 PetscBool first_found; 4603 PetscErrorCode ierr; 4604 4605 PetscFunctionBegin; 4606 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4607 if (subset_mult) { 4608 PetscCheckSameComm(subset,1,subset_mult,2); 4609 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4610 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4611 } 4612 /* create workspace layout for computing global indices of subset */ 4613 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4614 lbounds[0] = lbounds[1] = 0; 4615 for (i=0;i<n;i++) { 4616 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4617 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4618 } 4619 lbounds[0] = -lbounds[0]; 4620 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4621 gbounds[0] = -gbounds[0]; 4622 N = gbounds[1] - gbounds[0] + 1; 4623 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4624 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4625 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4626 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4627 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4628 4629 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4630 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4631 if (subset_mult) { 4632 const PetscInt* idxs_mult; 4633 4634 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4635 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4636 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4637 } else { 4638 for (i=0;i<n;i++) leaf_data[i] = 1; 4639 } 4640 /* local size of new subset */ 4641 n_n = 0; 4642 for (i=0;i<n;i++) n_n += leaf_data[i]; 4643 4644 /* global indexes in layout */ 4645 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4646 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4647 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4648 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4649 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4650 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4651 4652 /* reduce from leaves to roots */ 4653 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4654 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4655 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4656 4657 /* count indexes in local part of layout */ 4658 nlocals = 0; 4659 first_index = -1; 4660 first_found = PETSC_FALSE; 4661 for (i=0;i<Nl;i++) { 4662 if (!first_found && root_data[i]) { 4663 first_found = PETSC_TRUE; 4664 first_index = i; 4665 } 4666 nlocals += root_data[i]; 4667 } 4668 4669 /* cumulative of number of indexes and size of subset without holes */ 4670 #if defined(PETSC_HAVE_MPI_EXSCAN) 4671 start = 0; 4672 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4673 #else 4674 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4675 start = start-nlocals; 4676 #endif 4677 4678 if (N_n) { /* compute total size of new subset if requested */ 4679 *N_n = start + nlocals; 4680 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4681 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4682 } 4683 4684 /* adapt root data with cumulative */ 4685 if (first_found) { 4686 PetscInt old_index; 4687 4688 root_data[first_index] += start; 4689 old_index = first_index; 4690 for (i=first_index+1;i<Nl;i++) { 4691 if (root_data[i]) { 4692 root_data[i] += root_data[old_index]; 4693 old_index = i; 4694 } 4695 } 4696 } 4697 4698 /* from roots to leaves */ 4699 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4700 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4701 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4702 4703 /* create new IS with global indexes without holes */ 4704 if (subset_mult) { 4705 const PetscInt* idxs_mult; 4706 PetscInt cum; 4707 4708 cum = 0; 4709 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4710 for (i=0;i<n;i++) { 4711 PetscInt j; 4712 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4713 } 4714 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4715 } else { 4716 for (i=0;i<n;i++) { 4717 gidxs[i] = leaf_data[i]-1; 4718 } 4719 } 4720 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4721 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4722 PetscFunctionReturn(0); 4723 } 4724 4725 #undef __FUNCT__ 4726 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4727 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4728 { 4729 PetscInt i,j; 4730 PetscScalar *alphas; 4731 PetscErrorCode ierr; 4732 4733 PetscFunctionBegin; 4734 /* this implements stabilized Gram-Schmidt */ 4735 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4736 for (i=0;i<n;i++) { 4737 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4738 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4739 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4740 } 4741 ierr = PetscFree(alphas);CHKERRQ(ierr); 4742 PetscFunctionReturn(0); 4743 } 4744 4745 #undef __FUNCT__ 4746 #define __FUNCT__ "MatISGetSubassemblingPattern" 4747 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4748 { 4749 Mat A; 4750 PetscInt n_neighs,*neighs,*n_shared,**shared; 4751 PetscMPIInt size,rank,color; 4752 PetscInt *xadj,*adjncy; 4753 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4754 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4755 PetscInt void_procs,*procs_candidates = NULL; 4756 PetscInt xadj_count, *count; 4757 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4758 PetscSubcomm psubcomm; 4759 MPI_Comm subcomm; 4760 PetscErrorCode ierr; 4761 4762 PetscFunctionBegin; 4763 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4764 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4765 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4766 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4767 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4768 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4769 4770 if (have_void) *have_void = PETSC_FALSE; 4771 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4772 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4773 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4774 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4775 im_active = !!(n); 4776 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4777 void_procs = size - active_procs; 4778 /* get ranks of of non-active processes in mat communicator */ 4779 if (void_procs) { 4780 PetscInt ncand; 4781 4782 if (have_void) *have_void = PETSC_TRUE; 4783 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4784 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4785 for (i=0,ncand=0;i<size;i++) { 4786 if (!procs_candidates[i]) { 4787 procs_candidates[ncand++] = i; 4788 } 4789 } 4790 /* force n_subdomains to be not greater that the number of non-active processes */ 4791 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4792 } 4793 4794 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4795 if (active_procs < *n_subdomains) { 4796 PetscInt issize,isidx; 4797 if (im_active) { 4798 issize = 1; 4799 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4800 isidx = procs_candidates[rank]; 4801 } else { 4802 isidx = rank; 4803 } 4804 } else { 4805 issize = 0; 4806 isidx = -1; 4807 } 4808 *n_subdomains = active_procs; 4809 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4810 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4811 PetscFunctionReturn(0); 4812 } 4813 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4814 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4815 threshold = PetscMax(threshold,2); 4816 4817 /* Get info on mapping */ 4818 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4819 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4820 4821 /* build local CSR graph of subdomains' connectivity */ 4822 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4823 xadj[0] = 0; 4824 xadj[1] = PetscMax(n_neighs-1,0); 4825 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4826 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4827 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4828 for (i=1;i<n_neighs;i++) 4829 for (j=0;j<n_shared[i];j++) 4830 count[shared[i][j]] += 1; 4831 4832 xadj_count = 0; 4833 for (i=1;i<n_neighs;i++) { 4834 for (j=0;j<n_shared[i];j++) { 4835 if (count[shared[i][j]] < threshold) { 4836 adjncy[xadj_count] = neighs[i]; 4837 adjncy_wgt[xadj_count] = n_shared[i]; 4838 xadj_count++; 4839 break; 4840 } 4841 } 4842 } 4843 xadj[1] = xadj_count; 4844 ierr = PetscFree(count);CHKERRQ(ierr); 4845 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4846 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4847 4848 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4849 4850 /* Restrict work on active processes only */ 4851 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4852 if (void_procs) { 4853 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4854 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4855 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4856 subcomm = PetscSubcommChild(psubcomm); 4857 } else { 4858 psubcomm = NULL; 4859 subcomm = PetscObjectComm((PetscObject)mat); 4860 } 4861 4862 v_wgt = NULL; 4863 if (!color) { 4864 ierr = PetscFree(xadj);CHKERRQ(ierr); 4865 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4866 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4867 } else { 4868 Mat subdomain_adj; 4869 IS new_ranks,new_ranks_contig; 4870 MatPartitioning partitioner; 4871 PetscInt rstart=0,rend=0; 4872 PetscInt *is_indices,*oldranks; 4873 PetscMPIInt size; 4874 PetscBool aggregate; 4875 4876 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4877 if (void_procs) { 4878 PetscInt prank = rank; 4879 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4880 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4881 for (i=0;i<xadj[1];i++) { 4882 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4883 } 4884 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4885 } else { 4886 oldranks = NULL; 4887 } 4888 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4889 if (aggregate) { /* TODO: all this part could be made more efficient */ 4890 PetscInt lrows,row,ncols,*cols; 4891 PetscMPIInt nrank; 4892 PetscScalar *vals; 4893 4894 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 4895 lrows = 0; 4896 if (nrank<redprocs) { 4897 lrows = size/redprocs; 4898 if (nrank<size%redprocs) lrows++; 4899 } 4900 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4901 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4902 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4903 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4904 row = nrank; 4905 ncols = xadj[1]-xadj[0]; 4906 cols = adjncy; 4907 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4908 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4909 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4910 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4911 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4912 ierr = PetscFree(xadj);CHKERRQ(ierr); 4913 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4914 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4915 ierr = PetscFree(vals);CHKERRQ(ierr); 4916 if (use_vwgt) { 4917 Vec v; 4918 const PetscScalar *array; 4919 PetscInt nl; 4920 4921 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 4922 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 4923 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 4924 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 4925 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 4926 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 4927 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 4928 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 4929 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 4930 ierr = VecDestroy(&v);CHKERRQ(ierr); 4931 } 4932 } else { 4933 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4934 if (use_vwgt) { 4935 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4936 v_wgt[0] = local_size; 4937 } 4938 } 4939 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4940 4941 /* Partition */ 4942 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 4943 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4944 if (v_wgt) { 4945 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4946 } 4947 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 4948 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 4949 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4950 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4951 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4952 4953 /* renumber new_ranks to avoid "holes" in new set of processors */ 4954 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4955 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4956 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4957 if (!aggregate) { 4958 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4959 #if defined(PETSC_USE_DEBUG) 4960 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 4961 #endif 4962 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 4963 } else if (oldranks) { 4964 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4965 } else { 4966 ranks_send_to_idx[0] = is_indices[0]; 4967 } 4968 } else { 4969 PetscInt idxs[1]; 4970 PetscMPIInt tag; 4971 MPI_Request *reqs; 4972 4973 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4974 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4975 for (i=rstart;i<rend;i++) { 4976 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 4977 } 4978 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 4979 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4980 ierr = PetscFree(reqs);CHKERRQ(ierr); 4981 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4982 #if defined(PETSC_USE_DEBUG) 4983 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 4984 #endif 4985 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 4986 } else if (oldranks) { 4987 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4988 } else { 4989 ranks_send_to_idx[0] = idxs[0]; 4990 } 4991 } 4992 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4993 /* clean up */ 4994 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4995 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4996 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4997 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4998 } 4999 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5000 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5001 5002 /* assemble parallel IS for sends */ 5003 i = 1; 5004 if (!color) i=0; 5005 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5006 PetscFunctionReturn(0); 5007 } 5008 5009 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5010 5011 #undef __FUNCT__ 5012 #define __FUNCT__ "MatISSubassemble" 5013 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5014 { 5015 Mat local_mat; 5016 IS is_sends_internal; 5017 PetscInt rows,cols,new_local_rows; 5018 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5019 PetscBool ismatis,isdense,newisdense,destroy_mat; 5020 ISLocalToGlobalMapping l2gmap; 5021 PetscInt* l2gmap_indices; 5022 const PetscInt* is_indices; 5023 MatType new_local_type; 5024 /* buffers */ 5025 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5026 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5027 PetscInt *recv_buffer_idxs_local; 5028 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5029 /* MPI */ 5030 MPI_Comm comm,comm_n; 5031 PetscSubcomm subcomm; 5032 PetscMPIInt n_sends,n_recvs,commsize; 5033 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5034 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5035 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5036 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5037 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5038 PetscErrorCode ierr; 5039 5040 PetscFunctionBegin; 5041 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5042 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5043 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5044 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5045 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5046 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5047 PetscValidLogicalCollectiveBool(mat,reuse,6); 5048 PetscValidLogicalCollectiveInt(mat,nis,8); 5049 5050 /* further checks */ 5051 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5052 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5053 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5054 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5055 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5056 if (reuse && *mat_n) { 5057 PetscInt mrows,mcols,mnrows,mncols; 5058 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5059 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5060 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5061 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5062 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5063 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5064 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5065 } 5066 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5067 PetscValidLogicalCollectiveInt(mat,bs,0); 5068 5069 /* prepare IS for sending if not provided */ 5070 if (!is_sends) { 5071 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5072 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5073 } else { 5074 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5075 is_sends_internal = is_sends; 5076 } 5077 5078 /* get comm */ 5079 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5080 5081 /* compute number of sends */ 5082 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5083 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5084 5085 /* compute number of receives */ 5086 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5087 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5088 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5089 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5090 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5091 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5092 ierr = PetscFree(iflags);CHKERRQ(ierr); 5093 5094 /* restrict comm if requested */ 5095 subcomm = 0; 5096 destroy_mat = PETSC_FALSE; 5097 if (restrict_comm) { 5098 PetscMPIInt color,subcommsize; 5099 5100 color = 0; 5101 if (restrict_full) { 5102 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5103 } else { 5104 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5105 } 5106 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5107 subcommsize = commsize - subcommsize; 5108 /* check if reuse has been requested */ 5109 if (reuse) { 5110 if (*mat_n) { 5111 PetscMPIInt subcommsize2; 5112 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5113 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5114 comm_n = PetscObjectComm((PetscObject)*mat_n); 5115 } else { 5116 comm_n = PETSC_COMM_SELF; 5117 } 5118 } else { /* MAT_INITIAL_MATRIX */ 5119 PetscMPIInt rank; 5120 5121 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5122 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5123 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5124 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5125 comm_n = PetscSubcommChild(subcomm); 5126 } 5127 /* flag to destroy *mat_n if not significative */ 5128 if (color) destroy_mat = PETSC_TRUE; 5129 } else { 5130 comm_n = comm; 5131 } 5132 5133 /* prepare send/receive buffers */ 5134 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5135 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5136 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5137 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5138 if (nis) { 5139 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5140 } 5141 5142 /* Get data from local matrices */ 5143 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5144 /* TODO: See below some guidelines on how to prepare the local buffers */ 5145 /* 5146 send_buffer_vals should contain the raw values of the local matrix 5147 send_buffer_idxs should contain: 5148 - MatType_PRIVATE type 5149 - PetscInt size_of_l2gmap 5150 - PetscInt global_row_indices[size_of_l2gmap] 5151 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5152 */ 5153 else { 5154 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5155 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5156 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5157 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5158 send_buffer_idxs[1] = i; 5159 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5160 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5161 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5162 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5163 for (i=0;i<n_sends;i++) { 5164 ilengths_vals[is_indices[i]] = len*len; 5165 ilengths_idxs[is_indices[i]] = len+2; 5166 } 5167 } 5168 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5169 /* additional is (if any) */ 5170 if (nis) { 5171 PetscMPIInt psum; 5172 PetscInt j; 5173 for (j=0,psum=0;j<nis;j++) { 5174 PetscInt plen; 5175 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5176 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5177 psum += len+1; /* indices + lenght */ 5178 } 5179 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5180 for (j=0,psum=0;j<nis;j++) { 5181 PetscInt plen; 5182 const PetscInt *is_array_idxs; 5183 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5184 send_buffer_idxs_is[psum] = plen; 5185 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5186 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5187 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5188 psum += plen+1; /* indices + lenght */ 5189 } 5190 for (i=0;i<n_sends;i++) { 5191 ilengths_idxs_is[is_indices[i]] = psum; 5192 } 5193 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5194 } 5195 5196 buf_size_idxs = 0; 5197 buf_size_vals = 0; 5198 buf_size_idxs_is = 0; 5199 for (i=0;i<n_recvs;i++) { 5200 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5201 buf_size_vals += (PetscInt)olengths_vals[i]; 5202 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5203 } 5204 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5205 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5206 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5207 5208 /* get new tags for clean communications */ 5209 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5210 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5211 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5212 5213 /* allocate for requests */ 5214 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5215 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5216 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5217 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5218 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5219 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5220 5221 /* communications */ 5222 ptr_idxs = recv_buffer_idxs; 5223 ptr_vals = recv_buffer_vals; 5224 ptr_idxs_is = recv_buffer_idxs_is; 5225 for (i=0;i<n_recvs;i++) { 5226 source_dest = onodes[i]; 5227 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5228 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5229 ptr_idxs += olengths_idxs[i]; 5230 ptr_vals += olengths_vals[i]; 5231 if (nis) { 5232 source_dest = onodes_is[i]; 5233 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5234 ptr_idxs_is += olengths_idxs_is[i]; 5235 } 5236 } 5237 for (i=0;i<n_sends;i++) { 5238 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5239 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5240 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5241 if (nis) { 5242 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5243 } 5244 } 5245 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5246 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5247 5248 /* assemble new l2g map */ 5249 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5250 ptr_idxs = recv_buffer_idxs; 5251 new_local_rows = 0; 5252 for (i=0;i<n_recvs;i++) { 5253 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5254 ptr_idxs += olengths_idxs[i]; 5255 } 5256 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5257 ptr_idxs = recv_buffer_idxs; 5258 new_local_rows = 0; 5259 for (i=0;i<n_recvs;i++) { 5260 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5261 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5262 ptr_idxs += olengths_idxs[i]; 5263 } 5264 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5265 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5266 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5267 5268 /* infer new local matrix type from received local matrices type */ 5269 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5270 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5271 if (n_recvs) { 5272 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5273 ptr_idxs = recv_buffer_idxs; 5274 for (i=0;i<n_recvs;i++) { 5275 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5276 new_local_type_private = MATAIJ_PRIVATE; 5277 break; 5278 } 5279 ptr_idxs += olengths_idxs[i]; 5280 } 5281 switch (new_local_type_private) { 5282 case MATDENSE_PRIVATE: 5283 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5284 new_local_type = MATSEQAIJ; 5285 bs = 1; 5286 } else { /* if I receive only 1 dense matrix */ 5287 new_local_type = MATSEQDENSE; 5288 bs = 1; 5289 } 5290 break; 5291 case MATAIJ_PRIVATE: 5292 new_local_type = MATSEQAIJ; 5293 bs = 1; 5294 break; 5295 case MATBAIJ_PRIVATE: 5296 new_local_type = MATSEQBAIJ; 5297 break; 5298 case MATSBAIJ_PRIVATE: 5299 new_local_type = MATSEQSBAIJ; 5300 break; 5301 default: 5302 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5303 break; 5304 } 5305 } else { /* by default, new_local_type is seqdense */ 5306 new_local_type = MATSEQDENSE; 5307 bs = 1; 5308 } 5309 5310 /* create MATIS object if needed */ 5311 if (!reuse) { 5312 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5313 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5314 } else { 5315 /* it also destroys the local matrices */ 5316 if (*mat_n) { 5317 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5318 } else { /* this is a fake object */ 5319 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5320 } 5321 } 5322 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5323 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5324 5325 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5326 5327 /* Global to local map of received indices */ 5328 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5329 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5330 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5331 5332 /* restore attributes -> type of incoming data and its size */ 5333 buf_size_idxs = 0; 5334 for (i=0;i<n_recvs;i++) { 5335 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5336 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5337 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5338 } 5339 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5340 5341 /* set preallocation */ 5342 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5343 if (!newisdense) { 5344 PetscInt *new_local_nnz=0; 5345 5346 ptr_vals = recv_buffer_vals; 5347 ptr_idxs = recv_buffer_idxs_local; 5348 if (n_recvs) { 5349 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5350 } 5351 for (i=0;i<n_recvs;i++) { 5352 PetscInt j; 5353 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5354 for (j=0;j<*(ptr_idxs+1);j++) { 5355 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5356 } 5357 } else { 5358 /* TODO */ 5359 } 5360 ptr_idxs += olengths_idxs[i]; 5361 } 5362 if (new_local_nnz) { 5363 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5364 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5365 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5366 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5367 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5368 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5369 } else { 5370 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5371 } 5372 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5373 } else { 5374 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5375 } 5376 5377 /* set values */ 5378 ptr_vals = recv_buffer_vals; 5379 ptr_idxs = recv_buffer_idxs_local; 5380 for (i=0;i<n_recvs;i++) { 5381 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5382 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5383 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5384 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5385 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5386 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5387 } else { 5388 /* TODO */ 5389 } 5390 ptr_idxs += olengths_idxs[i]; 5391 ptr_vals += olengths_vals[i]; 5392 } 5393 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5394 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5395 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5396 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5397 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5398 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5399 5400 #if 0 5401 if (!restrict_comm) { /* check */ 5402 Vec lvec,rvec; 5403 PetscReal infty_error; 5404 5405 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5406 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5407 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5408 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5409 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5410 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5411 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5412 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5413 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5414 } 5415 #endif 5416 5417 /* assemble new additional is (if any) */ 5418 if (nis) { 5419 PetscInt **temp_idxs,*count_is,j,psum; 5420 5421 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5422 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5423 ptr_idxs = recv_buffer_idxs_is; 5424 psum = 0; 5425 for (i=0;i<n_recvs;i++) { 5426 for (j=0;j<nis;j++) { 5427 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5428 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5429 psum += plen; 5430 ptr_idxs += plen+1; /* shift pointer to received data */ 5431 } 5432 } 5433 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5434 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5435 for (i=1;i<nis;i++) { 5436 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5437 } 5438 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5439 ptr_idxs = recv_buffer_idxs_is; 5440 for (i=0;i<n_recvs;i++) { 5441 for (j=0;j<nis;j++) { 5442 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5443 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5444 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5445 ptr_idxs += plen+1; /* shift pointer to received data */ 5446 } 5447 } 5448 for (i=0;i<nis;i++) { 5449 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5450 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5451 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5452 } 5453 ierr = PetscFree(count_is);CHKERRQ(ierr); 5454 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5455 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5456 } 5457 /* free workspace */ 5458 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5459 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5460 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5461 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5462 if (isdense) { 5463 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5464 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5465 } else { 5466 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5467 } 5468 if (nis) { 5469 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5470 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5471 } 5472 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5473 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5474 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5475 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5476 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5477 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5478 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5479 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5480 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5481 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5482 ierr = PetscFree(onodes);CHKERRQ(ierr); 5483 if (nis) { 5484 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5485 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5486 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5487 } 5488 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5489 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5490 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5491 for (i=0;i<nis;i++) { 5492 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5493 } 5494 *mat_n = NULL; 5495 } 5496 PetscFunctionReturn(0); 5497 } 5498 5499 /* temporary hack into ksp private data structure */ 5500 #include <petsc/private/kspimpl.h> 5501 5502 #undef __FUNCT__ 5503 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5504 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5505 { 5506 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5507 PC_IS *pcis = (PC_IS*)pc->data; 5508 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5509 MatNullSpace CoarseNullSpace=NULL; 5510 ISLocalToGlobalMapping coarse_islg; 5511 IS coarse_is,*isarray; 5512 PetscInt i,im_active=-1,active_procs=-1; 5513 PetscInt nis,nisdofs,nisneu,nisvert; 5514 PC pc_temp; 5515 PCType coarse_pc_type; 5516 KSPType coarse_ksp_type; 5517 PetscBool multilevel_requested,multilevel_allowed; 5518 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5519 Mat t_coarse_mat_is; 5520 PetscInt ncoarse; 5521 PetscBool compute_vecs = PETSC_FALSE; 5522 PetscScalar *array; 5523 MatReuse coarse_mat_reuse; 5524 PetscBool restr, full_restr, have_void; 5525 PetscErrorCode ierr; 5526 5527 PetscFunctionBegin; 5528 /* Assign global numbering to coarse dofs */ 5529 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5530 PetscInt ocoarse_size; 5531 compute_vecs = PETSC_TRUE; 5532 ocoarse_size = pcbddc->coarse_size; 5533 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5534 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5535 /* see if we can avoid some work */ 5536 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5537 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5538 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5539 PC pc; 5540 PetscBool isbddc; 5541 5542 /* temporary workaround since PCBDDC does not have a reset method so far */ 5543 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5544 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5545 if (isbddc) { 5546 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5547 } else { 5548 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5549 } 5550 coarse_reuse = PETSC_FALSE; 5551 } else { /* we can safely reuse already computed coarse matrix */ 5552 coarse_reuse = PETSC_TRUE; 5553 } 5554 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5555 coarse_reuse = PETSC_FALSE; 5556 } 5557 /* reset any subassembling information */ 5558 if (!coarse_reuse || pcbddc->recompute_topography) { 5559 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5560 } 5561 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5562 coarse_reuse = PETSC_TRUE; 5563 } 5564 /* assemble coarse matrix */ 5565 if (coarse_reuse && pcbddc->coarse_ksp) { 5566 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5567 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5568 coarse_mat_reuse = MAT_REUSE_MATRIX; 5569 } else { 5570 coarse_mat = NULL; 5571 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5572 } 5573 5574 /* creates temporary l2gmap and IS for coarse indexes */ 5575 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5576 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5577 5578 /* creates temporary MATIS object for coarse matrix */ 5579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5580 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5581 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5582 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5583 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5584 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5585 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5586 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5587 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5588 5589 /* count "active" (i.e. with positive local size) and "void" processes */ 5590 im_active = !!(pcis->n); 5591 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5592 5593 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5594 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5595 /* full_restr : just use the receivers from the subassembling pattern */ 5596 coarse_mat_is = NULL; 5597 multilevel_allowed = PETSC_FALSE; 5598 multilevel_requested = PETSC_FALSE; 5599 full_restr = PETSC_TRUE; 5600 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5601 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5602 if (multilevel_requested) { 5603 ncoarse = active_procs/pcbddc->coarsening_ratio; 5604 restr = PETSC_FALSE; 5605 full_restr = PETSC_FALSE; 5606 } else { 5607 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5608 restr = PETSC_TRUE; 5609 full_restr = PETSC_TRUE; 5610 } 5611 ncoarse = PetscMax(1,ncoarse); 5612 if (!pcbddc->coarse_subassembling) { 5613 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5614 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5615 PetscInt psum; 5616 PetscMPIInt size; 5617 if (pcbddc->coarse_ksp) psum = 1; 5618 else psum = 0; 5619 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5620 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5621 if (ncoarse < size) have_void = PETSC_TRUE; 5622 } 5623 /* determine if we can go multilevel */ 5624 if (multilevel_requested) { 5625 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5626 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5627 } 5628 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5629 5630 /* dump subassembling pattern */ 5631 if (pcbddc->dbg_flag && multilevel_allowed) { 5632 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5633 } 5634 5635 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5636 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5637 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5638 const PetscInt *idxs; 5639 ISLocalToGlobalMapping tmap; 5640 5641 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5642 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5643 /* allocate space for temporary storage */ 5644 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5645 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5646 /* allocate for IS array */ 5647 nisdofs = pcbddc->n_ISForDofsLocal; 5648 nisneu = !!pcbddc->NeumannBoundariesLocal; 5649 nisvert = 0; /* nisvert is not used */ 5650 nis = nisdofs + nisneu + nisvert; 5651 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5652 /* dofs splitting */ 5653 for (i=0;i<nisdofs;i++) { 5654 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5655 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5656 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5657 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5658 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5659 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5660 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5661 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5662 } 5663 /* neumann boundaries */ 5664 if (pcbddc->NeumannBoundariesLocal) { 5665 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5666 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5667 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5668 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5669 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5670 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5671 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5672 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5673 } 5674 /* free memory */ 5675 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5676 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5677 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5678 } else { 5679 nis = 0; 5680 nisdofs = 0; 5681 nisneu = 0; 5682 nisvert = 0; 5683 isarray = NULL; 5684 } 5685 /* destroy no longer needed map */ 5686 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5687 5688 /* subassemble */ 5689 if (multilevel_allowed) { 5690 PetscBool reuse,reuser; 5691 if (coarse_mat) reuse = PETSC_TRUE; 5692 else reuse = PETSC_FALSE; 5693 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5694 if (reuser) { 5695 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5696 } else { 5697 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5698 } 5699 } else { 5700 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5701 } 5702 if (coarse_mat_is || coarse_mat) { 5703 PetscMPIInt size; 5704 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5705 if (!multilevel_allowed) { 5706 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5707 } else { 5708 Mat A; 5709 5710 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5711 if (coarse_mat_is) { 5712 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5713 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5714 coarse_mat = coarse_mat_is; 5715 } 5716 /* be sure we don't have MatSeqDENSE as local mat */ 5717 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5718 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5719 } 5720 } 5721 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5722 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5723 5724 /* create local to global scatters for coarse problem */ 5725 if (compute_vecs) { 5726 PetscInt lrows; 5727 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5728 if (coarse_mat) { 5729 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5730 } else { 5731 lrows = 0; 5732 } 5733 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5734 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5735 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5736 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5737 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5738 } 5739 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5740 5741 /* set defaults for coarse KSP and PC */ 5742 if (multilevel_allowed) { 5743 coarse_ksp_type = KSPRICHARDSON; 5744 coarse_pc_type = PCBDDC; 5745 } else { 5746 coarse_ksp_type = KSPPREONLY; 5747 coarse_pc_type = PCREDUNDANT; 5748 } 5749 5750 /* print some info if requested */ 5751 if (pcbddc->dbg_flag) { 5752 if (!multilevel_allowed) { 5753 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5754 if (multilevel_requested) { 5755 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5756 } else if (pcbddc->max_levels) { 5757 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5758 } 5759 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5760 } 5761 } 5762 5763 /* create the coarse KSP object only once with defaults */ 5764 if (coarse_mat) { 5765 PetscViewer dbg_viewer = NULL; 5766 if (pcbddc->dbg_flag) { 5767 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5768 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5769 } 5770 if (!pcbddc->coarse_ksp) { 5771 char prefix[256],str_level[16]; 5772 size_t len; 5773 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5774 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5775 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5776 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5777 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5778 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5779 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5780 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5781 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5782 /* prefix */ 5783 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5784 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5785 if (!pcbddc->current_level) { 5786 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5787 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5788 } else { 5789 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5790 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5791 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5792 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5793 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5794 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5795 } 5796 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5797 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5798 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5799 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5800 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5801 /* allow user customization */ 5802 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5803 } 5804 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5805 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5806 if (nisdofs) { 5807 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5808 for (i=0;i<nisdofs;i++) { 5809 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5810 } 5811 } 5812 if (nisneu) { 5813 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5814 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5815 } 5816 if (nisvert) { 5817 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5818 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5819 } 5820 5821 /* get some info after set from options */ 5822 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5823 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5824 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5825 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5826 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5827 isbddc = PETSC_FALSE; 5828 } 5829 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5830 if (isredundant) { 5831 KSP inner_ksp; 5832 PC inner_pc; 5833 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5834 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5835 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5836 } 5837 5838 /* parameters which miss an API */ 5839 if (isbddc) { 5840 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5841 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5842 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5843 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5844 if (pcbddc_coarse->benign_saddle_point) { 5845 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5846 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5847 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5848 } 5849 } 5850 5851 /* propagate symmetry info of coarse matrix */ 5852 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5853 if (pc->pmat->symmetric_set) { 5854 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5855 } 5856 if (pc->pmat->hermitian_set) { 5857 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5858 } 5859 if (pc->pmat->spd_set) { 5860 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5861 } 5862 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 5863 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 5864 } 5865 /* set operators */ 5866 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5867 if (pcbddc->dbg_flag) { 5868 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5869 } 5870 } 5871 ierr = PetscFree(isarray);CHKERRQ(ierr); 5872 #if 0 5873 { 5874 PetscViewer viewer; 5875 char filename[256]; 5876 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5877 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5878 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5879 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5880 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5881 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5882 } 5883 #endif 5884 5885 /* Compute coarse null space (special handling by BDDC only) */ 5886 #if 0 5887 if (pcbddc->NullSpace) { 5888 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5889 } 5890 #endif 5891 /* hack */ 5892 if (pcbddc->coarse_ksp) { 5893 Vec crhs,csol; 5894 5895 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5896 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5897 if (!csol) { 5898 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5899 } 5900 if (!crhs) { 5901 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5902 } 5903 } 5904 5905 /* compute null space for coarse solver if the benign trick has been requested */ 5906 if (pcbddc->benign_null) { 5907 5908 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5909 for (i=0;i<pcbddc->benign_n;i++) { 5910 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5911 } 5912 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5913 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5914 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5915 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5916 if (coarse_mat) { 5917 Vec nullv; 5918 PetscScalar *array,*array2; 5919 PetscInt nl; 5920 5921 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5922 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5923 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5924 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5925 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5926 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5927 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5928 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5929 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5930 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5931 } 5932 } 5933 5934 if (pcbddc->coarse_ksp) { 5935 PetscBool ispreonly; 5936 5937 if (CoarseNullSpace) { 5938 PetscBool isnull; 5939 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5940 if (0) { 5941 if (isbddc && !pcbddc->benign_saddle_point) { 5942 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5943 } else { 5944 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5945 } 5946 } else { 5947 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5948 } 5949 } 5950 /* setup coarse ksp */ 5951 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5952 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5953 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5954 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5955 KSP check_ksp; 5956 KSPType check_ksp_type; 5957 PC check_pc; 5958 Vec check_vec,coarse_vec; 5959 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5960 PetscInt its; 5961 PetscBool compute_eigs; 5962 PetscReal *eigs_r,*eigs_c; 5963 PetscInt neigs; 5964 const char *prefix; 5965 5966 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5967 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5968 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5969 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5970 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5971 /* prevent from setup unneeded object */ 5972 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 5973 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 5974 if (ispreonly) { 5975 check_ksp_type = KSPPREONLY; 5976 compute_eigs = PETSC_FALSE; 5977 } else { 5978 check_ksp_type = KSPGMRES; 5979 compute_eigs = PETSC_TRUE; 5980 } 5981 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5982 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5983 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5984 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5985 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5986 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5987 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5988 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5989 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5990 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5991 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5992 /* create random vec */ 5993 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 5994 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5995 if (CoarseNullSpace) { 5996 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5997 } 5998 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5999 /* solve coarse problem */ 6000 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6001 if (CoarseNullSpace) { 6002 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 6003 } 6004 /* set eigenvalue estimation if preonly has not been requested */ 6005 if (compute_eigs) { 6006 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6007 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6008 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6009 lambda_max = eigs_r[neigs-1]; 6010 lambda_min = eigs_r[0]; 6011 if (pcbddc->use_coarse_estimates) { 6012 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6013 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6014 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6015 } 6016 } 6017 } 6018 6019 /* check coarse problem residual error */ 6020 if (pcbddc->dbg_flag) { 6021 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6022 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6023 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6024 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6025 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6026 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6027 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6028 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6029 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6030 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6031 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6032 if (CoarseNullSpace) { 6033 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6034 } 6035 if (compute_eigs) { 6036 PetscReal lambda_max_s,lambda_min_s; 6037 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6038 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6039 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6040 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6041 for (i=0;i<neigs;i++) { 6042 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6043 } 6044 } 6045 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6046 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6047 } 6048 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6049 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6050 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6051 if (compute_eigs) { 6052 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6053 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6054 } 6055 } 6056 } 6057 /* print additional info */ 6058 if (pcbddc->dbg_flag) { 6059 /* waits until all processes reaches this point */ 6060 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6061 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6063 } 6064 6065 /* free memory */ 6066 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6067 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6068 PetscFunctionReturn(0); 6069 } 6070 6071 #undef __FUNCT__ 6072 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6073 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6074 { 6075 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6076 PC_IS* pcis = (PC_IS*)pc->data; 6077 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6078 IS subset,subset_mult,subset_n; 6079 PetscInt local_size,coarse_size=0; 6080 PetscInt *local_primal_indices=NULL; 6081 const PetscInt *t_local_primal_indices; 6082 PetscErrorCode ierr; 6083 6084 PetscFunctionBegin; 6085 /* Compute global number of coarse dofs */ 6086 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6087 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6088 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6089 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6090 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6091 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6092 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6093 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6094 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6095 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6096 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6097 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6098 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6099 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6100 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6101 6102 /* check numbering */ 6103 if (pcbddc->dbg_flag) { 6104 PetscScalar coarsesum,*array,*array2; 6105 PetscInt i; 6106 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6107 6108 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6109 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6111 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6112 /* counter */ 6113 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6114 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6115 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6116 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6117 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6118 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6119 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6120 for (i=0;i<pcbddc->local_primal_size;i++) { 6121 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6122 } 6123 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6124 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6125 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6126 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6127 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6128 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6129 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6130 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6131 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6132 for (i=0;i<pcis->n;i++) { 6133 if (array[i] != 0.0 && array[i] != array2[i]) { 6134 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6135 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6136 set_error = PETSC_TRUE; 6137 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6139 } 6140 } 6141 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6142 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6144 for (i=0;i<pcis->n;i++) { 6145 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6146 } 6147 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6148 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6149 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6150 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6151 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6153 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6154 PetscInt *gidxs; 6155 6156 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6157 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6160 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6161 for (i=0;i<pcbddc->local_primal_size;i++) { 6162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6163 } 6164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6165 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6166 } 6167 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6168 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6169 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6170 } 6171 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6172 /* get back data */ 6173 *coarse_size_n = coarse_size; 6174 *local_primal_indices_n = local_primal_indices; 6175 PetscFunctionReturn(0); 6176 } 6177 6178 #undef __FUNCT__ 6179 #define __FUNCT__ "PCBDDCGlobalToLocal" 6180 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6181 { 6182 IS localis_t; 6183 PetscInt i,lsize,*idxs,n; 6184 PetscScalar *vals; 6185 PetscErrorCode ierr; 6186 6187 PetscFunctionBegin; 6188 /* get indices in local ordering exploiting local to global map */ 6189 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6190 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6191 for (i=0;i<lsize;i++) vals[i] = 1.0; 6192 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6193 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6194 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6195 if (idxs) { /* multilevel guard */ 6196 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6197 } 6198 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6199 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6200 ierr = PetscFree(vals);CHKERRQ(ierr); 6201 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6202 /* now compute set in local ordering */ 6203 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6204 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6205 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6206 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6207 for (i=0,lsize=0;i<n;i++) { 6208 if (PetscRealPart(vals[i]) > 0.5) { 6209 lsize++; 6210 } 6211 } 6212 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6213 for (i=0,lsize=0;i<n;i++) { 6214 if (PetscRealPart(vals[i]) > 0.5) { 6215 idxs[lsize++] = i; 6216 } 6217 } 6218 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6220 *localis = localis_t; 6221 PetscFunctionReturn(0); 6222 } 6223 6224 #undef __FUNCT__ 6225 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6226 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6227 { 6228 PC_IS *pcis=(PC_IS*)pc->data; 6229 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6230 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6231 Mat S_j; 6232 PetscInt *used_xadj,*used_adjncy; 6233 PetscBool free_used_adj; 6234 PetscErrorCode ierr; 6235 6236 PetscFunctionBegin; 6237 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6238 free_used_adj = PETSC_FALSE; 6239 if (pcbddc->sub_schurs_layers == -1) { 6240 used_xadj = NULL; 6241 used_adjncy = NULL; 6242 } else { 6243 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6244 used_xadj = pcbddc->mat_graph->xadj; 6245 used_adjncy = pcbddc->mat_graph->adjncy; 6246 } else if (pcbddc->computed_rowadj) { 6247 used_xadj = pcbddc->mat_graph->xadj; 6248 used_adjncy = pcbddc->mat_graph->adjncy; 6249 } else { 6250 PetscBool flg_row=PETSC_FALSE; 6251 const PetscInt *xadj,*adjncy; 6252 PetscInt nvtxs; 6253 6254 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6255 if (flg_row) { 6256 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6257 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6258 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6259 free_used_adj = PETSC_TRUE; 6260 } else { 6261 pcbddc->sub_schurs_layers = -1; 6262 used_xadj = NULL; 6263 used_adjncy = NULL; 6264 } 6265 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6266 } 6267 } 6268 6269 /* setup sub_schurs data */ 6270 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6271 if (!sub_schurs->schur_explicit) { 6272 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6273 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6274 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6275 } else { 6276 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6277 PetscBool isseqaij,need_change = PETSC_FALSE;; 6278 PetscInt benign_n; 6279 Mat change = NULL; 6280 Vec scaling = NULL; 6281 IS change_primal = NULL; 6282 6283 if (!pcbddc->use_vertices && reuse_solvers) { 6284 PetscInt n_vertices; 6285 6286 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6287 reuse_solvers = (PetscBool)!n_vertices; 6288 } 6289 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6290 if (!isseqaij) { 6291 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6292 if (matis->A == pcbddc->local_mat) { 6293 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6294 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6295 } else { 6296 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6297 } 6298 } 6299 if (!pcbddc->benign_change_explicit) { 6300 benign_n = pcbddc->benign_n; 6301 } else { 6302 benign_n = 0; 6303 } 6304 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6305 We need a global reduction to avoid possible deadlocks. 6306 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6307 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6308 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6309 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6310 need_change = (PetscBool)(!need_change); 6311 } 6312 /* If the user defines additional constraints, we import them here. 6313 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6314 if (need_change) { 6315 PC_IS *pcisf; 6316 PC_BDDC *pcbddcf; 6317 PC pcf; 6318 6319 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6320 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6321 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6322 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6323 /* hacks */ 6324 pcisf = (PC_IS*)pcf->data; 6325 pcisf->is_B_local = pcis->is_B_local; 6326 pcisf->vec1_N = pcis->vec1_N; 6327 pcisf->BtoNmap = pcis->BtoNmap; 6328 pcisf->n = pcis->n; 6329 pcisf->n_B = pcis->n_B; 6330 pcbddcf = (PC_BDDC*)pcf->data; 6331 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6332 pcbddcf->mat_graph = pcbddc->mat_graph; 6333 pcbddcf->use_faces = PETSC_TRUE; 6334 pcbddcf->use_change_of_basis = PETSC_TRUE; 6335 pcbddcf->use_change_on_faces = PETSC_TRUE; 6336 pcbddcf->use_qr_single = PETSC_TRUE; 6337 pcbddcf->fake_change = PETSC_TRUE; 6338 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6339 /* store information on primal vertices and change of basis (in local numbering) */ 6340 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6341 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6342 change = pcbddcf->ConstraintMatrix; 6343 pcbddcf->ConstraintMatrix = NULL; 6344 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6345 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6346 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6347 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6348 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6349 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6350 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6351 pcf->ops->destroy = NULL; 6352 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6353 } 6354 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6355 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6356 ierr = MatDestroy(&change);CHKERRQ(ierr); 6357 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6358 } 6359 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6360 6361 /* free adjacency */ 6362 if (free_used_adj) { 6363 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6364 } 6365 PetscFunctionReturn(0); 6366 } 6367 6368 #undef __FUNCT__ 6369 #define __FUNCT__ "PCBDDCInitSubSchurs" 6370 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6371 { 6372 PC_IS *pcis=(PC_IS*)pc->data; 6373 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6374 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6375 PCBDDCGraph graph; 6376 PetscErrorCode ierr; 6377 6378 PetscFunctionBegin; 6379 /* attach interface graph for determining subsets */ 6380 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6381 IS verticesIS,verticescomm; 6382 PetscInt vsize,*idxs; 6383 6384 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6385 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6386 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6387 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6388 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6389 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6390 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6391 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6392 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6393 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6394 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6395 } else { 6396 graph = pcbddc->mat_graph; 6397 } 6398 /* print some info */ 6399 if (pcbddc->dbg_flag) { 6400 IS vertices; 6401 PetscInt nv,nedges,nfaces; 6402 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6403 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6404 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6405 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6406 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6408 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6409 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6412 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6413 } 6414 6415 /* sub_schurs init */ 6416 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6417 6418 /* free graph struct */ 6419 if (pcbddc->sub_schurs_rebuild) { 6420 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6421 } 6422 PetscFunctionReturn(0); 6423 } 6424 6425 #undef __FUNCT__ 6426 #define __FUNCT__ "PCBDDCCheckOperator" 6427 PetscErrorCode PCBDDCCheckOperator(PC pc) 6428 { 6429 PC_IS *pcis=(PC_IS*)pc->data; 6430 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6431 PetscErrorCode ierr; 6432 6433 PetscFunctionBegin; 6434 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6435 IS zerodiag = NULL; 6436 Mat S_j,B0_B=NULL; 6437 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6438 PetscScalar *p0_check,*array,*array2; 6439 PetscReal norm; 6440 PetscInt i; 6441 6442 /* B0 and B0_B */ 6443 if (zerodiag) { 6444 IS dummy; 6445 6446 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6447 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6448 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6449 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6450 } 6451 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6452 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6453 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6454 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6455 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6456 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6457 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6458 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6459 /* S_j */ 6460 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6461 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6462 6463 /* mimic vector in \widetilde{W}_\Gamma */ 6464 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6465 /* continuous in primal space */ 6466 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6467 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6468 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6469 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6470 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6471 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6472 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6473 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6474 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6475 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6476 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6477 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6478 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6479 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6480 6481 /* assemble rhs for coarse problem */ 6482 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6483 /* local with Schur */ 6484 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6485 if (zerodiag) { 6486 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6487 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6488 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6489 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6490 } 6491 /* sum on primal nodes the local contributions */ 6492 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6493 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6494 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6495 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6496 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6497 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6498 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6499 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6500 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6501 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6502 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6503 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6504 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6505 /* scale primal nodes (BDDC sums contibutions) */ 6506 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6507 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6508 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6509 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6510 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6511 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6512 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6513 /* global: \widetilde{B0}_B w_\Gamma */ 6514 if (zerodiag) { 6515 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6516 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6517 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6518 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6519 } 6520 /* BDDC */ 6521 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6522 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6523 6524 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6525 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6526 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6527 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6528 for (i=0;i<pcbddc->benign_n;i++) { 6529 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6530 } 6531 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6532 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6533 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6534 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6535 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6536 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6537 } 6538 PetscFunctionReturn(0); 6539 } 6540