1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 9 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 10 { 11 PCBDDCBenignMatMult_ctx ctx; 12 PetscErrorCode ierr; 13 PetscBool apply_right,apply_left,reset_x; 14 15 PetscFunctionBegin; 16 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 17 if (transpose) { 18 apply_right = ctx->apply_left; 19 apply_left = ctx->apply_right; 20 } else { 21 apply_right = ctx->apply_right; 22 apply_left = ctx->apply_left; 23 } 24 reset_x = PETSC_FALSE; 25 if (apply_right) { 26 const PetscScalar *ax; 27 PetscInt nl,i; 28 29 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 30 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 31 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 32 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 33 for (i=0;i<ctx->benign_n;i++) { 34 PetscScalar sum,val; 35 const PetscInt *idxs; 36 PetscInt nz,j; 37 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 38 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 39 sum = 0.; 40 if (ctx->apply_p0) { 41 val = ctx->work[idxs[nz-1]]; 42 for (j=0;j<nz-1;j++) { 43 sum += ctx->work[idxs[j]]; 44 ctx->work[idxs[j]] += val; 45 } 46 } else { 47 for (j=0;j<nz-1;j++) { 48 sum += ctx->work[idxs[j]]; 49 } 50 } 51 ctx->work[idxs[nz-1]] -= sum; 52 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 53 } 54 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 55 reset_x = PETSC_TRUE; 56 } 57 if (transpose) { 58 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 59 } else { 60 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 61 } 62 if (reset_x) { 63 ierr = VecResetArray(x);CHKERRQ(ierr); 64 } 65 if (apply_left) { 66 PetscScalar *ay; 67 PetscInt i; 68 69 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 70 for (i=0;i<ctx->benign_n;i++) { 71 PetscScalar sum,val; 72 const PetscInt *idxs; 73 PetscInt nz,j; 74 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 75 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 76 val = -ay[idxs[nz-1]]; 77 if (ctx->apply_p0) { 78 sum = 0.; 79 for (j=0;j<nz-1;j++) { 80 sum += ay[idxs[j]]; 81 ay[idxs[j]] += val; 82 } 83 ay[idxs[nz-1]] += sum; 84 } else { 85 for (j=0;j<nz-1;j++) { 86 ay[idxs[j]] += val; 87 } 88 ay[idxs[nz-1]] = 0.; 89 } 90 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 91 } 92 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 93 } 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 99 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 100 { 101 PetscErrorCode ierr; 102 103 PetscFunctionBegin; 104 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 110 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 111 { 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCBenignShellMat" 121 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 122 { 123 PC_IS *pcis = (PC_IS*)pc->data; 124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 125 PCBDDCBenignMatMult_ctx ctx; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 if (!restore) { 130 Mat A_IB,A_BI; 131 PetscScalar *work; 132 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 133 134 if (pcbddc->benign_original_mat) { 135 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 136 } 137 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 138 PetscFunctionReturn(0); 139 } 140 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 141 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 142 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 143 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 146 ierr = PetscNew(&ctx);CHKERRQ(ierr); 147 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 148 ctx->apply_left = PETSC_TRUE; 149 ctx->apply_right = PETSC_FALSE; 150 ctx->apply_p0 = PETSC_FALSE; 151 ctx->benign_n = pcbddc->benign_n; 152 if (reuse) { 153 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 154 ctx->free = PETSC_FALSE; 155 } else { /* TODO: could be optimized for successive solves */ 156 ISLocalToGlobalMapping N_to_D; 157 PetscInt i; 158 159 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 160 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 161 for (i=0;i<pcbddc->benign_n;i++) { 162 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 163 } 164 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 165 ctx->free = PETSC_TRUE; 166 } 167 ctx->A = pcis->A_IB; 168 ctx->work = work; 169 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 170 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 pcis->A_IB = A_IB; 173 174 /* A_BI as A_IB^T */ 175 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 176 pcbddc->benign_original_mat = pcis->A_BI; 177 pcis->A_BI = A_BI; 178 } else { 179 if (!pcbddc->benign_original_mat) { 180 PetscFunctionReturn(0); 181 } 182 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 183 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 184 pcis->A_IB = ctx->A; 185 ctx->A = NULL; 186 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 187 pcis->A_BI = pcbddc->benign_original_mat; 188 pcbddc->benign_original_mat = NULL; 189 if (ctx->free) { 190 PetscInt i; 191 for (i=0;i<ctx->benign_n;i++) { 192 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 193 } 194 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 195 } 196 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 197 ierr = PetscFree(ctx);CHKERRQ(ierr); 198 } 199 PetscFunctionReturn(0); 200 } 201 202 /* used just in bddc debug mode */ 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCBDDCBenignProject" 205 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 206 { 207 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 208 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 209 Mat An; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 214 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 215 if (is1) { 216 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 217 ierr = MatDestroy(&An);CHKERRQ(ierr); 218 } else { 219 *B = An; 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* TODO: add reuse flag */ 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatSeqAIJCompress" 227 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 228 { 229 Mat Bt; 230 PetscScalar *a,*bdata; 231 const PetscInt *ii,*ij; 232 PetscInt m,n,i,nnz,*bii,*bij; 233 PetscBool flg_row; 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 238 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 239 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 240 nnz = n; 241 for (i=0;i<ii[n];i++) { 242 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 243 } 244 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 247 nnz = 0; 248 bii[0] = 0; 249 for (i=0;i<n;i++) { 250 PetscInt j; 251 for (j=ii[i];j<ii[i+1];j++) { 252 PetscScalar entry = a[j]; 253 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 254 bij[nnz] = ij[j]; 255 bdata[nnz] = entry; 256 nnz++; 257 } 258 } 259 bii[i+1] = nnz; 260 } 261 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 262 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 263 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 264 { 265 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 266 b->free_a = PETSC_TRUE; 267 b->free_ij = PETSC_TRUE; 268 } 269 *B = Bt; 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatDetectDisconnectedComponents" 275 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 276 { 277 Mat B; 278 IS is_dummy,*cc_n; 279 ISLocalToGlobalMapping l2gmap_dummy; 280 PCBDDCGraph graph; 281 PetscInt i,n; 282 PetscInt *xadj,*adjncy; 283 PetscInt *xadj_filtered,*adjncy_filtered; 284 PetscBool flg_row,isseqaij; 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 if (!A->rmap->N || !A->cmap->N) { 289 *ncc = 0; 290 *cc = NULL; 291 PetscFunctionReturn(0); 292 } 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 294 if (!isseqaij && filter) { 295 PetscBool isseqdense; 296 297 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 298 if (!isseqdense) { 299 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 300 } else { /* TODO: rectangular case and LDA */ 301 PetscScalar *array; 302 PetscReal chop=1.e-6; 303 304 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 305 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 306 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 307 for (i=0;i<n;i++) { 308 PetscInt j; 309 for (j=i+1;j<n;j++) { 310 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 311 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 312 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 313 } 314 } 315 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 316 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 317 } 318 } else { 319 B = A; 320 } 321 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 322 323 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 324 if (filter) { 325 PetscScalar *data; 326 PetscInt j,cum; 327 328 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 329 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 330 cum = 0; 331 for (i=0;i<n;i++) { 332 PetscInt t; 333 334 for (j=xadj[i];j<xadj[i+1];j++) { 335 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 336 continue; 337 } 338 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 339 } 340 t = xadj_filtered[i]; 341 xadj_filtered[i] = cum; 342 cum += t; 343 } 344 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 345 } else { 346 xadj_filtered = NULL; 347 adjncy_filtered = NULL; 348 } 349 350 /* compute local connected components using PCBDDCGraph */ 351 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 353 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 354 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 355 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 357 if (xadj_filtered) { 358 graph->xadj = xadj_filtered; 359 graph->adjncy = adjncy_filtered; 360 } else { 361 graph->xadj = xadj; 362 graph->adjncy = adjncy; 363 } 364 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 365 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 366 /* partial clean up */ 367 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 368 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 369 if (A != B) { 370 ierr = MatDestroy(&B);CHKERRQ(ierr); 371 } 372 373 /* get back data */ 374 if (ncc) *ncc = graph->ncc; 375 if (cc) { 376 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 377 for (i=0;i<graph->ncc;i++) { 378 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 379 } 380 *cc = cc_n; 381 } 382 /* clean up graph */ 383 graph->xadj = 0; 384 graph->adjncy = 0; 385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "PCBDDCBenignCheck" 391 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 392 { 393 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 394 PC_IS* pcis = (PC_IS*)(pc->data); 395 IS dirIS = NULL; 396 PetscInt i; 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 401 if (zerodiag) { 402 Mat A; 403 Vec vec3_N; 404 PetscScalar *vals; 405 const PetscInt *idxs; 406 PetscInt nz,*count; 407 408 /* p0 */ 409 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 410 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 411 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 413 for (i=0;i<nz;i++) vals[i] = 1.; 414 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 416 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 417 /* v_I */ 418 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 419 for (i=0;i<nz;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 422 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 423 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 424 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 426 if (dirIS) { 427 PetscInt n; 428 429 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 430 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 431 for (i=0;i<n;i++) vals[i] = 0.; 432 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 434 } 435 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 437 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 438 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 439 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 440 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 441 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 442 if (PetscAbsScalar(vals[0]) > 1.e-1) { 443 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 444 } 445 ierr = PetscFree(vals);CHKERRQ(ierr); 446 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 447 448 /* there should not be any pressure dofs lying on the interface */ 449 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 450 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 451 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 452 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 453 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 454 for (i=0;i<nz;i++) { 455 if (count[idxs[i]]) { 456 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 457 } 458 } 459 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 460 ierr = PetscFree(count);CHKERRQ(ierr); 461 } 462 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 463 464 /* check PCBDDCBenignGetOrSetP0 */ 465 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 466 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 467 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 468 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 469 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 470 for (i=0;i<pcbddc->benign_n;i++) { 471 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 472 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 473 } 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 480 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 481 { 482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 483 IS pressures,zerodiag,*zerodiag_subs; 484 PetscInt nz,n; 485 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 491 for (n=0;n<pcbddc->benign_n;n++) { 492 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 495 pcbddc->benign_n = 0; 496 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 497 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 498 Checks if all the pressure dofs in each subdomain have a zero diagonal 499 If not, a change of basis on pressures is not needed 500 since the local Schur complements are SPD 501 */ 502 has_null_pressures = PETSC_TRUE; 503 have_null = PETSC_TRUE; 504 if (pcbddc->n_ISForDofsLocal) { 505 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 506 507 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 508 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 509 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 510 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 511 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 512 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 513 if (!sorted) { 514 ierr = ISSort(pressures);CHKERRQ(ierr); 515 } 516 } else { 517 pressures = NULL; 518 } 519 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 520 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 521 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 522 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 523 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 524 if (!sorted) { 525 ierr = ISSort(zerodiag);CHKERRQ(ierr); 526 } 527 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 528 if (!nz) { 529 if (n) have_null = PETSC_FALSE; 530 has_null_pressures = PETSC_FALSE; 531 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 532 } 533 recompute_zerodiag = PETSC_FALSE; 534 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 535 zerodiag_subs = NULL; 536 pcbddc->benign_n = 0; 537 if (has_null_pressures) { 538 IS *subs; 539 PetscInt nsubs,i; 540 541 subs = pcbddc->local_subs; 542 nsubs = pcbddc->n_local_subs; 543 if (nsubs > 1) { 544 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 545 for (i=0;i<nsubs;i++) { 546 ISLocalToGlobalMapping l2g; 547 IS t_zerodiag_subs; 548 PetscInt nl; 549 550 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 551 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 552 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 553 if (nl) { 554 PetscBool valid = PETSC_TRUE; 555 556 if (pressures) { 557 IS t_pressure_subs; 558 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 559 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 560 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 561 } 562 if (valid) { 563 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 564 pcbddc->benign_n++; 565 } else { 566 recompute_zerodiag = PETSC_TRUE; 567 } 568 } 569 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 571 } 572 } else { /* there's just one subdomain (or zero if they have not been detected */ 573 PetscBool valid = PETSC_TRUE; 574 if (pressures) { 575 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 576 } 577 if (valid) { 578 pcbddc->benign_n = 1; 579 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 580 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 581 zerodiag_subs[0] = zerodiag; 582 } 583 } 584 } 585 586 if (!pcbddc->benign_n) { 587 PetscInt n; 588 589 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 590 recompute_zerodiag = PETSC_FALSE; 591 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 592 if (n) { 593 has_null_pressures = PETSC_FALSE; 594 have_null = PETSC_FALSE; 595 } 596 } 597 598 /* final check for null pressures */ 599 if (zerodiag && pressures) { 600 PetscInt nz,np; 601 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 602 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 603 if (nz != np) have_null = PETSC_FALSE; 604 } 605 606 if (recompute_zerodiag) { 607 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 608 if (pcbddc->benign_n == 1) { 609 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 610 zerodiag = zerodiag_subs[0]; 611 } else { 612 PetscInt i,nzn,*new_idxs; 613 614 nzn = 0; 615 for (i=0;i<pcbddc->benign_n;i++) { 616 PetscInt ns; 617 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 618 nzn += ns; 619 } 620 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 621 nzn = 0; 622 for (i=0;i<pcbddc->benign_n;i++) { 623 PetscInt ns,*idxs; 624 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 625 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 626 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 627 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 628 nzn += ns; 629 } 630 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 631 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 632 } 633 have_null = PETSC_FALSE; 634 } 635 636 /* no-net-flux */ 637 if (pcbddc->benign_compute_nonetflux) { 638 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 639 MatNullSpace near_null_space; 640 ISLocalToGlobalMapping rmap; 641 Vec quad_vec; 642 PetscScalar *pvals; 643 PetscInt i,np,*dummyins; 644 645 /* create vector to hold quadrature weights */ 646 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 647 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 648 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 649 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 650 651 /* compute B^{(i)T} * 1_p */ 652 np = 0; 653 if (zerodiag) { 654 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 655 } else if (pressures) { 656 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 657 } 658 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 659 for (i=0;i<np;i++) pvals[i] = 1.; 660 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 661 if (np) { 662 const PetscInt *pidxs; 663 664 if (zerodiag) { 665 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 666 } else if (pressures) { 667 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 668 } 669 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 670 if (zerodiag) { 671 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 672 } else if (pressures) { 673 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 674 } 675 } 676 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 677 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 678 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 679 ierr = PetscFree(pvals);CHKERRQ(ierr); 680 681 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 682 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 683 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 684 for (i=0;i<matis->sf->nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 685 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 686 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 687 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 688 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 689 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 690 for (i=0;i<matis->sf->nleaves;i++) { 691 if (PetscGlobalRank != matis->sf_leafdata[i]) { 692 pvals[i] = 0.; 693 } 694 } 695 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 696 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 697 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 698 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 699 ierr = PetscFree(dummyins);CHKERRQ(ierr); 700 701 /* assembly quadrature vec and attach near null space to pmat */ 702 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 703 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 704 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 705 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 706 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 707 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 708 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 709 } 710 711 /* change of basis and p0 dofs */ 712 if (has_null_pressures) { 713 IS zerodiagc; 714 const PetscInt *idxs,*idxsc; 715 PetscInt i,s,*nnz; 716 717 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 718 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 719 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 720 /* local change of basis for pressures */ 721 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 722 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 723 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 724 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 725 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 726 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 727 for (i=0;i<pcbddc->benign_n;i++) { 728 PetscInt nzs,j; 729 730 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 731 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 732 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 733 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 734 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 735 } 736 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 737 ierr = PetscFree(nnz);CHKERRQ(ierr); 738 /* set identity on velocities */ 739 for (i=0;i<n-nz;i++) { 740 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 741 } 742 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 743 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 744 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 745 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 746 /* set change on pressures */ 747 for (s=0;s<pcbddc->benign_n;s++) { 748 PetscScalar *array; 749 PetscInt nzs; 750 751 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 752 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 753 for (i=0;i<nzs-1;i++) { 754 PetscScalar vals[2]; 755 PetscInt cols[2]; 756 757 cols[0] = idxs[i]; 758 cols[1] = idxs[nzs-1]; 759 vals[0] = 1.; 760 vals[1] = 1.; 761 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 762 } 763 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 764 for (i=0;i<nzs-1;i++) array[i] = -1.; 765 array[nzs-1] = 1.; 766 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 767 /* store local idxs for p0 */ 768 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 769 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 770 ierr = PetscFree(array);CHKERRQ(ierr); 771 } 772 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 /* project if needed */ 775 if (pcbddc->benign_change_explicit) { 776 Mat M; 777 778 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 780 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 781 ierr = MatDestroy(&M);CHKERRQ(ierr); 782 } 783 /* store global idxs for p0 */ 784 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 785 } 786 pcbddc->benign_zerodiag_subs = zerodiag_subs; 787 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 788 789 /* determines if the coarse solver will be singular or not */ 790 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 791 /* determines if the problem has subdomains with 0 pressure block */ 792 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 793 *zerodiaglocal = zerodiag; 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 799 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 800 { 801 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 802 PetscScalar *array; 803 PetscErrorCode ierr; 804 805 PetscFunctionBegin; 806 if (!pcbddc->benign_sf) { 807 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 808 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 809 } 810 if (get) { 811 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 812 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 813 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 814 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 815 } else { 816 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 817 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 819 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 826 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 827 { 828 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 /* TODO: add error checking 833 - avoid nested pop (or push) calls. 834 - cannot push before pop. 835 - cannot call this if pcbddc->local_mat is NULL 836 */ 837 if (!pcbddc->benign_n) { 838 PetscFunctionReturn(0); 839 } 840 if (pop) { 841 if (pcbddc->benign_change_explicit) { 842 IS is_p0; 843 MatReuse reuse; 844 845 /* extract B_0 */ 846 reuse = MAT_INITIAL_MATRIX; 847 if (pcbddc->benign_B0) { 848 reuse = MAT_REUSE_MATRIX; 849 } 850 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 851 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 852 /* remove rows and cols from local problem */ 853 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 854 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 855 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 856 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 857 } else { 858 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 859 PetscScalar *vals; 860 PetscInt i,n,*idxs_ins; 861 862 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 863 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 864 if (!pcbddc->benign_B0) { 865 PetscInt *nnz; 866 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 867 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 868 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 869 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 870 for (i=0;i<pcbddc->benign_n;i++) { 871 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 872 nnz[i] = n - nnz[i]; 873 } 874 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 875 ierr = PetscFree(nnz);CHKERRQ(ierr); 876 } 877 878 for (i=0;i<pcbddc->benign_n;i++) { 879 PetscScalar *array; 880 PetscInt *idxs,j,nz,cum; 881 882 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 883 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 884 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 885 for (j=0;j<nz;j++) vals[j] = 1.; 886 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 887 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 888 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 889 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 890 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 891 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 892 cum = 0; 893 for (j=0;j<n;j++) { 894 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 895 vals[cum] = array[j]; 896 idxs_ins[cum] = j; 897 cum++; 898 } 899 } 900 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 901 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 902 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 903 } 904 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 905 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 906 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 907 } 908 } else { /* push */ 909 if (pcbddc->benign_change_explicit) { 910 PetscInt i; 911 912 for (i=0;i<pcbddc->benign_n;i++) { 913 PetscScalar *B0_vals; 914 PetscInt *B0_cols,B0_ncol; 915 916 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 917 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 918 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 919 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 920 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 921 } 922 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 923 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924 } else { 925 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 926 } 927 } 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "PCBDDCAdaptiveSelection" 933 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 934 { 935 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 936 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 937 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 938 PetscBLASInt *B_iwork,*B_ifail; 939 PetscScalar *work,lwork; 940 PetscScalar *St,*S,*eigv; 941 PetscScalar *Sarray,*Starray; 942 PetscReal *eigs,thresh; 943 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 944 PetscBool allocated_S_St; 945 #if defined(PETSC_USE_COMPLEX) 946 PetscReal *rwork; 947 #endif 948 PetscErrorCode ierr; 949 950 PetscFunctionBegin; 951 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 952 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 953 954 if (pcbddc->dbg_flag) { 955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 957 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 958 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 959 } 960 961 if (pcbddc->dbg_flag) { 962 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 963 } 964 965 /* max size of subsets */ 966 mss = 0; 967 for (i=0;i<sub_schurs->n_subs;i++) { 968 PetscInt subset_size; 969 970 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 971 mss = PetscMax(mss,subset_size); 972 } 973 974 /* min/max and threshold */ 975 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 976 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 977 nmax = PetscMax(nmin,nmax); 978 allocated_S_St = PETSC_FALSE; 979 if (nmin) { 980 allocated_S_St = PETSC_TRUE; 981 } 982 983 /* allocate lapack workspace */ 984 cum = cum2 = 0; 985 maxneigs = 0; 986 for (i=0;i<sub_schurs->n_subs;i++) { 987 PetscInt n,subset_size; 988 989 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 990 n = PetscMin(subset_size,nmax); 991 cum += subset_size; 992 cum2 += subset_size*n; 993 maxneigs = PetscMax(maxneigs,n); 994 } 995 if (mss) { 996 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 997 PetscBLASInt B_itype = 1; 998 PetscBLASInt B_N = mss; 999 PetscReal zero = 0.0; 1000 PetscReal eps = 0.0; /* dlamch? */ 1001 1002 B_lwork = -1; 1003 S = NULL; 1004 St = NULL; 1005 eigs = NULL; 1006 eigv = NULL; 1007 B_iwork = NULL; 1008 B_ifail = NULL; 1009 #if defined(PETSC_USE_COMPLEX) 1010 rwork = NULL; 1011 #endif 1012 thresh = 1.0; 1013 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1014 #if defined(PETSC_USE_COMPLEX) 1015 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1016 #else 1017 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1018 #endif 1019 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1020 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1021 } else { 1022 /* TODO */ 1023 } 1024 } else { 1025 lwork = 0; 1026 } 1027 1028 nv = 0; 1029 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1030 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1031 } 1032 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1033 if (allocated_S_St) { 1034 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1035 } 1036 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1037 #if defined(PETSC_USE_COMPLEX) 1038 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1039 #endif 1040 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1041 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1042 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1043 nv+cum,&pcbddc->adaptive_constraints_idxs, 1044 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1045 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1046 1047 maxneigs = 0; 1048 cum = cumarray = 0; 1049 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1050 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1051 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1052 const PetscInt *idxs; 1053 1054 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1055 for (cum=0;cum<nv;cum++) { 1056 pcbddc->adaptive_constraints_n[cum] = 1; 1057 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1058 pcbddc->adaptive_constraints_data[cum] = 1.0; 1059 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1060 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1061 } 1062 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1063 } 1064 1065 if (mss) { /* multilevel */ 1066 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1067 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1068 } 1069 1070 thresh = pcbddc->adaptive_threshold; 1071 for (i=0;i<sub_schurs->n_subs;i++) { 1072 const PetscInt *idxs; 1073 PetscReal upper,lower; 1074 PetscInt j,subset_size,eigs_start = 0; 1075 PetscBLASInt B_N; 1076 PetscBool same_data = PETSC_FALSE; 1077 1078 if (pcbddc->use_deluxe_scaling) { 1079 upper = PETSC_MAX_REAL; 1080 lower = thresh; 1081 } else { 1082 upper = 1./thresh; 1083 lower = 0.; 1084 } 1085 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1086 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1087 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1088 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1089 if (sub_schurs->is_hermitian) { 1090 PetscInt j,k; 1091 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1092 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1093 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1094 } 1095 for (j=0;j<subset_size;j++) { 1096 for (k=j;k<subset_size;k++) { 1097 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1098 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1099 } 1100 } 1101 } else { 1102 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1103 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1104 } 1105 } else { 1106 S = Sarray + cumarray; 1107 St = Starray + cumarray; 1108 } 1109 /* see if we can save some work */ 1110 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1111 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1112 } 1113 1114 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1115 B_neigs = 0; 1116 } else { 1117 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1118 PetscBLASInt B_itype = 1; 1119 PetscBLASInt B_IL, B_IU; 1120 PetscReal eps = -1.0; /* dlamch? */ 1121 PetscInt nmin_s; 1122 PetscBool compute_range = PETSC_FALSE; 1123 1124 if (pcbddc->dbg_flag) { 1125 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1126 } 1127 1128 compute_range = PETSC_FALSE; 1129 if (thresh > 1.+PETSC_SMALL && !same_data) { 1130 compute_range = PETSC_TRUE; 1131 } 1132 1133 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1134 if (compute_range) { 1135 1136 /* ask for eigenvalues larger than thresh */ 1137 #if defined(PETSC_USE_COMPLEX) 1138 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1139 #else 1140 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1141 #endif 1142 } else if (!same_data) { 1143 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1144 B_IL = 1; 1145 #if defined(PETSC_USE_COMPLEX) 1146 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1147 #else 1148 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1149 #endif 1150 } else { /* same_data is true, so get the adaptive function requested by the user */ 1151 PetscInt k; 1152 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1153 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1154 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1155 nmin = nmax; 1156 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1157 for (k=0;k<nmax;k++) { 1158 eigs[k] = 1./PETSC_SMALL; 1159 eigv[k*(subset_size+1)] = 1.0; 1160 } 1161 } 1162 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1163 if (B_ierr) { 1164 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1165 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1166 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1167 } 1168 1169 if (B_neigs > nmax) { 1170 if (pcbddc->dbg_flag) { 1171 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1172 } 1173 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1174 B_neigs = nmax; 1175 } 1176 1177 nmin_s = PetscMin(nmin,B_N); 1178 if (B_neigs < nmin_s) { 1179 PetscBLASInt B_neigs2; 1180 1181 if (pcbddc->use_deluxe_scaling) { 1182 B_IL = B_N - nmin_s + 1; 1183 B_IU = B_N - B_neigs; 1184 } else { 1185 B_IL = B_neigs + 1; 1186 B_IU = nmin_s; 1187 } 1188 if (pcbddc->dbg_flag) { 1189 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1190 } 1191 if (sub_schurs->is_hermitian) { 1192 PetscInt j; 1193 for (j=0;j<subset_size;j++) { 1194 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1195 } 1196 for (j=0;j<subset_size;j++) { 1197 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1198 } 1199 } else { 1200 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1201 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1202 } 1203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_COMPLEX) 1205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1206 #else 1207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1208 #endif 1209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1210 B_neigs += B_neigs2; 1211 } 1212 if (B_ierr) { 1213 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1214 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1215 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1216 } 1217 if (pcbddc->dbg_flag) { 1218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1219 for (j=0;j<B_neigs;j++) { 1220 if (eigs[j] == 0.0) { 1221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1222 } else { 1223 if (pcbddc->use_deluxe_scaling) { 1224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1225 } else { 1226 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1227 } 1228 } 1229 } 1230 } 1231 } else { 1232 /* TODO */ 1233 } 1234 } 1235 /* change the basis back to the original one */ 1236 if (sub_schurs->change) { 1237 Mat change,phi,phit; 1238 1239 if (pcbddc->dbg_flag > 1) { 1240 PetscInt ii; 1241 for (ii=0;ii<B_neigs;ii++) { 1242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1243 for (j=0;j<B_N;j++) { 1244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1245 } 1246 } 1247 } 1248 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1250 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1251 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1252 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1253 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1254 } 1255 maxneigs = PetscMax(B_neigs,maxneigs); 1256 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1257 if (B_neigs) { 1258 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1259 1260 if (pcbddc->dbg_flag > 1) { 1261 PetscInt ii; 1262 for (ii=0;ii<B_neigs;ii++) { 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1264 for (j=0;j<B_N;j++) { 1265 #if defined(PETSC_USE_COMPLEX) 1266 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1267 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1269 #else 1270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1271 #endif 1272 } 1273 } 1274 } 1275 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1276 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1277 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1278 cum++; 1279 } 1280 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1281 /* shift for next computation */ 1282 cumarray += subset_size*subset_size; 1283 } 1284 if (pcbddc->dbg_flag) { 1285 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1286 } 1287 1288 if (mss) { 1289 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1290 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1291 /* destroy matrices (junk) */ 1292 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1293 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1294 } 1295 if (allocated_S_St) { 1296 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1297 } 1298 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1299 #if defined(PETSC_USE_COMPLEX) 1300 ierr = PetscFree(rwork);CHKERRQ(ierr); 1301 #endif 1302 if (pcbddc->dbg_flag) { 1303 PetscInt maxneigs_r; 1304 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "PCBDDCSetUpSolvers" 1312 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1313 { 1314 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1315 PetscScalar *coarse_submat_vals; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1320 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1321 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1322 1323 /* Setup local neumann solver ksp_R */ 1324 /* PCBDDCSetUpLocalScatters should be called first! */ 1325 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1326 1327 /* Change global null space passed in by the user if change of basis has been requested */ 1328 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1329 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1330 } 1331 1332 /* 1333 Setup local correction and local part of coarse basis. 1334 Gives back the dense local part of the coarse matrix in column major ordering 1335 */ 1336 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1337 1338 /* Compute total number of coarse nodes and setup coarse solver */ 1339 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1340 1341 /* free */ 1342 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "PCBDDCResetCustomization" 1348 PetscErrorCode PCBDDCResetCustomization(PC pc) 1349 { 1350 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1355 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1356 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1357 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1358 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1359 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1360 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1361 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1362 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1363 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1364 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1365 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "PCBDDCResetTopography" 1371 PetscErrorCode PCBDDCResetTopography(PC pc) 1372 { 1373 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1374 PetscInt i; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1379 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1380 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1381 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1382 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1384 for (i=0;i<pcbddc->n_local_subs;i++) { 1385 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1388 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "PCBDDCResetSolvers" 1394 PetscErrorCode PCBDDCResetSolvers(PC pc) 1395 { 1396 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1401 if (pcbddc->coarse_phi_B) { 1402 PetscScalar *array; 1403 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1404 ierr = PetscFree(array);CHKERRQ(ierr); 1405 } 1406 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1407 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1408 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1409 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1410 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1411 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1412 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1413 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1414 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1415 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1416 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1417 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1418 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1419 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1420 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1421 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1422 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1423 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1424 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1425 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1426 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1427 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1428 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1429 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1430 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1431 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1432 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1433 if (pcbddc->benign_zerodiag_subs) { 1434 PetscInt i; 1435 for (i=0;i<pcbddc->benign_n;i++) { 1436 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1437 } 1438 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1439 } 1440 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1446 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1447 { 1448 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1449 PC_IS *pcis = (PC_IS*)pc->data; 1450 VecType impVecType; 1451 PetscInt n_constraints,n_R,old_size; 1452 PetscErrorCode ierr; 1453 1454 PetscFunctionBegin; 1455 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1456 /* get sizes */ 1457 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1458 n_R = pcis->n - pcbddc->n_vertices; 1459 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1460 /* local work vectors (try to avoid unneeded work)*/ 1461 /* R nodes */ 1462 old_size = -1; 1463 if (pcbddc->vec1_R) { 1464 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1465 } 1466 if (n_R != old_size) { 1467 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1468 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1469 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1470 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1471 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1472 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1473 } 1474 /* local primal dofs */ 1475 old_size = -1; 1476 if (pcbddc->vec1_P) { 1477 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1478 } 1479 if (pcbddc->local_primal_size != old_size) { 1480 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1481 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1482 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1483 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1484 } 1485 /* local explicit constraints */ 1486 old_size = -1; 1487 if (pcbddc->vec1_C) { 1488 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1489 } 1490 if (n_constraints && n_constraints != old_size) { 1491 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1493 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1494 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "PCBDDCSetUpCorrection" 1501 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1502 { 1503 PetscErrorCode ierr; 1504 /* pointers to pcis and pcbddc */ 1505 PC_IS* pcis = (PC_IS*)pc->data; 1506 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1507 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1508 /* submatrices of local problem */ 1509 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1510 /* submatrices of local coarse problem */ 1511 Mat S_VV,S_CV,S_VC,S_CC; 1512 /* working matrices */ 1513 Mat C_CR; 1514 /* additional working stuff */ 1515 PC pc_R; 1516 Mat F; 1517 Vec dummy_vec; 1518 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1519 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1520 PetscScalar *work; 1521 PetscInt *idx_V_B; 1522 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1523 PetscInt i,n_R,n_D,n_B; 1524 1525 /* some shortcuts to scalars */ 1526 PetscScalar one=1.0,m_one=-1.0; 1527 1528 PetscFunctionBegin; 1529 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1530 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1531 } 1532 1533 /* Set Non-overlapping dimensions */ 1534 n_vertices = pcbddc->n_vertices; 1535 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1536 n_B = pcis->n_B; 1537 n_D = pcis->n - n_B; 1538 n_R = pcis->n - n_vertices; 1539 1540 /* vertices in boundary numbering */ 1541 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1542 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1543 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1544 1545 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1546 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1547 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1548 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1550 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1551 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1552 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1553 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1554 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1555 1556 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1557 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1558 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1559 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1560 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1561 lda_rhs = n_R; 1562 need_benign_correction = PETSC_FALSE; 1563 if (isLU || isILU || isCHOL) { 1564 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1565 } else if (sub_schurs->reuse_solver) { 1566 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1567 MatFactorType type; 1568 1569 F = reuse_solver->F; 1570 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1571 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1572 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1573 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1574 } else { 1575 F = NULL; 1576 } 1577 1578 /* allocate workspace */ 1579 n = 0; 1580 if (n_constraints) { 1581 n += lda_rhs*n_constraints; 1582 } 1583 if (n_vertices) { 1584 n = PetscMax(2*lda_rhs*n_vertices,n); 1585 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1586 } 1587 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1588 1589 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1590 dummy_vec = NULL; 1591 if (need_benign_correction && lda_rhs != n_R && F) { 1592 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1593 } 1594 1595 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1596 if (n_constraints) { 1597 Mat M1,M2,M3,C_B; 1598 IS is_aux; 1599 PetscScalar *array,*array2; 1600 1601 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1602 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1603 1604 /* Extract constraints on R nodes: C_{CR} */ 1605 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1606 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1607 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1608 1609 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1610 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1611 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1612 for (i=0;i<n_constraints;i++) { 1613 const PetscScalar *row_cmat_values; 1614 const PetscInt *row_cmat_indices; 1615 PetscInt size_of_constraint,j; 1616 1617 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1618 for (j=0;j<size_of_constraint;j++) { 1619 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1620 } 1621 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1622 } 1623 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1624 if (F) { 1625 Mat B; 1626 1627 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1628 if (need_benign_correction) { 1629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1630 1631 /* rhs is already zero on interior dofs, no need to change the rhs */ 1632 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1633 } 1634 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1635 if (need_benign_correction) { 1636 PetscScalar *marr; 1637 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1638 1639 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1640 if (lda_rhs != n_R) { 1641 for (i=0;i<n_constraints;i++) { 1642 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1643 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1644 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1645 } 1646 } else { 1647 for (i=0;i<n_constraints;i++) { 1648 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1649 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1650 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1651 } 1652 } 1653 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1654 } 1655 ierr = MatDestroy(&B);CHKERRQ(ierr); 1656 } else { 1657 PetscScalar *marr; 1658 1659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1660 for (i=0;i<n_constraints;i++) { 1661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1666 } 1667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1668 } 1669 if (!pcbddc->switch_static) { 1670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1673 for (i=0;i<n_constraints;i++) { 1674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1680 } 1681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1683 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1684 } else { 1685 if (lda_rhs != n_R) { 1686 IS dummy; 1687 1688 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1689 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1690 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1691 } else { 1692 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1693 pcbddc->local_auxmat2 = local_auxmat2_R; 1694 } 1695 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1696 } 1697 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1698 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1699 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1700 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1701 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1702 if (isCHOL) { 1703 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1704 } else { 1705 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1706 } 1707 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1708 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1709 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1710 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1711 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1712 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1713 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1714 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1715 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1716 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1717 } 1718 1719 /* Get submatrices from subdomain matrix */ 1720 if (n_vertices) { 1721 IS is_aux; 1722 1723 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1724 IS tis; 1725 1726 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1727 ierr = ISSort(tis);CHKERRQ(ierr); 1728 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1729 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1730 } else { 1731 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1732 } 1733 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1734 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1735 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1736 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1737 } 1738 1739 /* Matrix of coarse basis functions (local) */ 1740 if (pcbddc->coarse_phi_B) { 1741 PetscInt on_B,on_primal,on_D=n_D; 1742 if (pcbddc->coarse_phi_D) { 1743 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1744 } 1745 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1746 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1747 PetscScalar *marray; 1748 1749 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1750 ierr = PetscFree(marray);CHKERRQ(ierr); 1751 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1752 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1753 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1754 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1755 } 1756 } 1757 1758 if (!pcbddc->coarse_phi_B) { 1759 PetscScalar *marray; 1760 1761 n = n_B*pcbddc->local_primal_size; 1762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1763 n += n_D*pcbddc->local_primal_size; 1764 } 1765 if (!pcbddc->symmetric_primal) { 1766 n *= 2; 1767 } 1768 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1770 n = n_B*pcbddc->local_primal_size; 1771 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1773 n += n_D*pcbddc->local_primal_size; 1774 } 1775 if (!pcbddc->symmetric_primal) { 1776 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1777 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1778 n = n_B*pcbddc->local_primal_size; 1779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1780 } 1781 } else { 1782 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1783 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1784 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1785 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1786 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1787 } 1788 } 1789 } 1790 1791 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1792 p0_lidx_I = NULL; 1793 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1794 const PetscInt *idxs; 1795 1796 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1797 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1798 for (i=0;i<pcbddc->benign_n;i++) { 1799 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1800 } 1801 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1802 } 1803 1804 /* vertices */ 1805 if (n_vertices) { 1806 1807 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1808 1809 if (n_R) { 1810 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1811 PetscBLASInt B_N,B_one = 1; 1812 PetscScalar *x,*y; 1813 PetscBool isseqaij; 1814 1815 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1816 if (need_benign_correction) { 1817 ISLocalToGlobalMapping RtoN; 1818 IS is_p0; 1819 PetscInt *idxs_p0,n; 1820 1821 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1822 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1823 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1824 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1825 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1826 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1827 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1828 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1829 } 1830 1831 if (lda_rhs == n_R) { 1832 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1833 } else { 1834 PetscScalar *av,*array; 1835 const PetscInt *xadj,*adjncy; 1836 PetscInt n; 1837 PetscBool flg_row; 1838 1839 array = work+lda_rhs*n_vertices; 1840 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1841 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1842 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1843 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1844 for (i=0;i<n;i++) { 1845 PetscInt j; 1846 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1847 } 1848 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1849 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1851 } 1852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1853 if (need_benign_correction) { 1854 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1855 PetscScalar *marr; 1856 1857 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1858 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1859 1860 | 0 0 0 | (V) 1861 L = | 0 0 -1 | (P-p0) 1862 | 0 0 -1 | (p0) 1863 1864 */ 1865 for (i=0;i<reuse_solver->benign_n;i++) { 1866 const PetscScalar *vals; 1867 const PetscInt *idxs,*idxs_zero; 1868 PetscInt n,j,nz; 1869 1870 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1871 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1872 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1873 for (j=0;j<n;j++) { 1874 PetscScalar val = vals[j]; 1875 PetscInt k,col = idxs[j]; 1876 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1877 } 1878 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1879 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1880 } 1881 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1882 } 1883 if (F) { 1884 /* need to correct the rhs */ 1885 if (need_benign_correction) { 1886 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1887 PetscScalar *marr; 1888 1889 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1890 if (lda_rhs != n_R) { 1891 for (i=0;i<n_vertices;i++) { 1892 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1893 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1894 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1895 } 1896 } else { 1897 for (i=0;i<n_vertices;i++) { 1898 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1899 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1901 } 1902 } 1903 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1904 } 1905 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1906 /* need to correct the solution */ 1907 if (need_benign_correction) { 1908 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1909 PetscScalar *marr; 1910 1911 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1912 if (lda_rhs != n_R) { 1913 for (i=0;i<n_vertices;i++) { 1914 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1915 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1916 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1917 } 1918 } else { 1919 for (i=0;i<n_vertices;i++) { 1920 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1921 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1922 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1923 } 1924 } 1925 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1926 } 1927 } else { 1928 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1929 for (i=0;i<n_vertices;i++) { 1930 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1931 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1932 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1933 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1934 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1935 } 1936 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1937 } 1938 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1939 /* S_VV and S_CV */ 1940 if (n_constraints) { 1941 Mat B; 1942 1943 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1944 for (i=0;i<n_vertices;i++) { 1945 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1946 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1947 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1948 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1951 } 1952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1953 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1954 ierr = MatDestroy(&B);CHKERRQ(ierr); 1955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1956 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1957 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1959 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1960 ierr = MatDestroy(&B);CHKERRQ(ierr); 1961 } 1962 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1963 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1964 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1965 } 1966 if (lda_rhs != n_R) { 1967 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1968 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1969 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1970 } 1971 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1972 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1973 if (need_benign_correction) { 1974 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1975 PetscScalar *marr,*sums; 1976 1977 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1978 ierr = MatDenseGetArray(S_VVt,&marr); 1979 for (i=0;i<reuse_solver->benign_n;i++) { 1980 const PetscScalar *vals; 1981 const PetscInt *idxs,*idxs_zero; 1982 PetscInt n,j,nz; 1983 1984 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1985 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1986 for (j=0;j<n_vertices;j++) { 1987 PetscInt k; 1988 sums[j] = 0.; 1989 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1990 } 1991 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1992 for (j=0;j<n;j++) { 1993 PetscScalar val = vals[j]; 1994 PetscInt k; 1995 for (k=0;k<n_vertices;k++) { 1996 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1997 } 1998 } 1999 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2000 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2001 } 2002 ierr = PetscFree(sums);CHKERRQ(ierr); 2003 ierr = MatDenseRestoreArray(S_VVt,&marr); 2004 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2005 } 2006 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2008 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2009 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2010 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2011 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2012 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2013 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2014 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2015 } else { 2016 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2017 } 2018 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2019 2020 /* coarse basis functions */ 2021 for (i=0;i<n_vertices;i++) { 2022 PetscScalar *y; 2023 2024 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2025 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2026 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2027 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2028 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2029 y[n_B*i+idx_V_B[i]] = 1.0; 2030 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2031 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2032 2033 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2034 PetscInt j; 2035 2036 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2037 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2038 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2039 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2040 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2041 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2042 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2043 } 2044 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2045 } 2046 /* if n_R == 0 the object is not destroyed */ 2047 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2048 } 2049 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2050 2051 if (n_constraints) { 2052 Mat B; 2053 2054 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2055 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2056 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2057 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2058 if (n_vertices) { 2059 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2060 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2061 } else { 2062 Mat S_VCt; 2063 2064 if (lda_rhs != n_R) { 2065 ierr = MatDestroy(&B);CHKERRQ(ierr); 2066 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2067 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2068 } 2069 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2070 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2071 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2072 } 2073 } 2074 ierr = MatDestroy(&B);CHKERRQ(ierr); 2075 /* coarse basis functions */ 2076 for (i=0;i<n_constraints;i++) { 2077 PetscScalar *y; 2078 2079 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2080 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2081 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2082 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2083 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2085 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2086 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2087 PetscInt j; 2088 2089 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2090 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2091 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2092 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2094 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2095 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2096 } 2097 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2098 } 2099 } 2100 if (n_constraints) { 2101 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2102 } 2103 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2104 2105 /* coarse matrix entries relative to B_0 */ 2106 if (pcbddc->benign_n) { 2107 Mat B0_B,B0_BPHI; 2108 IS is_dummy; 2109 PetscScalar *data; 2110 PetscInt j; 2111 2112 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2113 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2114 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2115 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2116 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2117 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2118 for (j=0;j<pcbddc->benign_n;j++) { 2119 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2120 for (i=0;i<pcbddc->local_primal_size;i++) { 2121 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2122 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2123 } 2124 } 2125 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2126 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2127 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2128 } 2129 2130 /* compute other basis functions for non-symmetric problems */ 2131 if (!pcbddc->symmetric_primal) { 2132 Mat B_V=NULL,B_C=NULL; 2133 PetscScalar *marray; 2134 2135 if (n_constraints) { 2136 Mat S_CCT,C_CRT; 2137 2138 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2139 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2140 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2141 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2142 if (n_vertices) { 2143 Mat S_VCT; 2144 2145 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2146 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2147 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2148 } 2149 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2150 } 2151 if (n_vertices && n_R) { 2152 PetscScalar *av,*marray; 2153 const PetscInt *xadj,*adjncy; 2154 PetscInt n; 2155 PetscBool flg_row; 2156 2157 /* B_V = B_V - A_VR^T */ 2158 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2159 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2160 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2161 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2162 for (i=0;i<n;i++) { 2163 PetscInt j; 2164 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2165 } 2166 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2167 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2168 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2169 } 2170 2171 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2172 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2173 for (i=0;i<n_vertices;i++) { 2174 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2175 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2176 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2177 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2178 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2179 } 2180 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2181 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2182 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2183 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2184 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2185 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2186 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2187 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2188 } 2189 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2190 /* coarse basis functions */ 2191 for (i=0;i<pcbddc->local_primal_size;i++) { 2192 PetscScalar *y; 2193 2194 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2195 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2196 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2197 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2198 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2199 if (i<n_vertices) { 2200 y[n_B*i+idx_V_B[i]] = 1.0; 2201 } 2202 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2203 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2204 2205 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2206 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2207 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2208 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2209 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2210 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2211 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2212 } 2213 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2214 } 2215 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2216 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2217 } 2218 /* free memory */ 2219 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2220 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2221 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2222 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2223 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2224 ierr = PetscFree(work);CHKERRQ(ierr); 2225 if (n_vertices) { 2226 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2227 } 2228 if (n_constraints) { 2229 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2230 } 2231 /* Checking coarse_sub_mat and coarse basis functios */ 2232 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2233 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2234 if (pcbddc->dbg_flag) { 2235 Mat coarse_sub_mat; 2236 Mat AUXMAT,TM1,TM2,TM3,TM4; 2237 Mat coarse_phi_D,coarse_phi_B; 2238 Mat coarse_psi_D,coarse_psi_B; 2239 Mat A_II,A_BB,A_IB,A_BI; 2240 Mat C_B,CPHI; 2241 IS is_dummy; 2242 Vec mones; 2243 MatType checkmattype=MATSEQAIJ; 2244 PetscReal real_value; 2245 2246 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2247 Mat A; 2248 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2249 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2250 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2251 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2252 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2253 ierr = MatDestroy(&A);CHKERRQ(ierr); 2254 } else { 2255 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2256 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2257 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2258 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2259 } 2260 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2261 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2262 if (!pcbddc->symmetric_primal) { 2263 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2264 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2265 } 2266 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2267 2268 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2271 if (!pcbddc->symmetric_primal) { 2272 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2274 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2276 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2277 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2278 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2279 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2280 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2281 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2282 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2283 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2284 } else { 2285 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2286 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2287 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2288 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2289 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2290 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2291 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2292 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2293 } 2294 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2295 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2296 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2297 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2298 if (pcbddc->benign_n) { 2299 Mat B0_B,B0_BPHI; 2300 PetscScalar *data,*data2; 2301 PetscInt j; 2302 2303 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2304 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2305 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2306 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2307 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2308 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2309 for (j=0;j<pcbddc->benign_n;j++) { 2310 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2311 for (i=0;i<pcbddc->local_primal_size;i++) { 2312 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2313 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2314 } 2315 } 2316 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2317 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2318 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2319 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2320 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2321 } 2322 #if 0 2323 { 2324 PetscViewer viewer; 2325 char filename[256]; 2326 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2327 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2328 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2329 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2330 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2331 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2332 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2333 if (save_change) { 2334 Mat phi_B; 2335 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2337 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2338 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2339 } else { 2340 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2341 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2342 } 2343 if (pcbddc->coarse_phi_D) { 2344 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2345 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2346 } 2347 if (pcbddc->coarse_psi_B) { 2348 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2349 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2350 } 2351 if (pcbddc->coarse_psi_D) { 2352 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2353 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2354 } 2355 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2356 } 2357 #endif 2358 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2359 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2360 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2362 2363 /* check constraints */ 2364 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2365 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2366 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2367 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2368 } else { 2369 PetscScalar *data; 2370 Mat tmat; 2371 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2372 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2373 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2374 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2375 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2376 } 2377 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2378 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2379 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2380 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2381 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2382 if (!pcbddc->symmetric_primal) { 2383 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2384 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2385 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2386 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2388 } 2389 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2390 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2391 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2392 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2394 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2395 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2396 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2397 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2398 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2399 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2400 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2401 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2402 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2403 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2404 if (!pcbddc->symmetric_primal) { 2405 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2406 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2407 } 2408 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2409 } 2410 /* get back data */ 2411 *coarse_submat_vals_n = coarse_submat_vals; 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2417 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2418 { 2419 Mat *work_mat; 2420 IS isrow_s,iscol_s; 2421 PetscBool rsorted,csorted; 2422 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2427 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2428 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2429 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2430 2431 if (!rsorted) { 2432 const PetscInt *idxs; 2433 PetscInt *idxs_sorted,i; 2434 2435 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2436 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2437 for (i=0;i<rsize;i++) { 2438 idxs_perm_r[i] = i; 2439 } 2440 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2441 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2442 for (i=0;i<rsize;i++) { 2443 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2444 } 2445 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2446 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2447 } else { 2448 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2449 isrow_s = isrow; 2450 } 2451 2452 if (!csorted) { 2453 if (isrow == iscol) { 2454 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2455 iscol_s = isrow_s; 2456 } else { 2457 const PetscInt *idxs; 2458 PetscInt *idxs_sorted,i; 2459 2460 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2461 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2462 for (i=0;i<csize;i++) { 2463 idxs_perm_c[i] = i; 2464 } 2465 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2466 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2467 for (i=0;i<csize;i++) { 2468 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2469 } 2470 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2471 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2472 } 2473 } else { 2474 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2475 iscol_s = iscol; 2476 } 2477 2478 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2479 2480 if (!rsorted || !csorted) { 2481 Mat new_mat; 2482 IS is_perm_r,is_perm_c; 2483 2484 if (!rsorted) { 2485 PetscInt *idxs_r,i; 2486 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2487 for (i=0;i<rsize;i++) { 2488 idxs_r[idxs_perm_r[i]] = i; 2489 } 2490 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2491 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2492 } else { 2493 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2494 } 2495 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2496 2497 if (!csorted) { 2498 if (isrow_s == iscol_s) { 2499 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2500 is_perm_c = is_perm_r; 2501 } else { 2502 PetscInt *idxs_c,i; 2503 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2504 for (i=0;i<csize;i++) { 2505 idxs_c[idxs_perm_c[i]] = i; 2506 } 2507 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2508 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2509 } 2510 } else { 2511 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2512 } 2513 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2514 2515 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2516 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2517 work_mat[0] = new_mat; 2518 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2519 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2520 } 2521 2522 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2523 *B = work_mat[0]; 2524 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2525 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2526 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2532 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2533 { 2534 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2535 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2536 Mat new_mat; 2537 IS is_local,is_global; 2538 PetscInt local_size; 2539 PetscBool isseqaij; 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2544 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2545 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2546 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2547 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2548 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2549 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2550 2551 /* check */ 2552 if (pcbddc->dbg_flag) { 2553 Vec x,x_change; 2554 PetscReal error; 2555 2556 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2557 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2558 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2559 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2560 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2561 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2562 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2563 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2564 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2565 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2568 ierr = VecDestroy(&x);CHKERRQ(ierr); 2569 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2570 } 2571 2572 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2573 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2574 if (isseqaij) { 2575 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2576 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2577 } else { 2578 Mat work_mat; 2579 2580 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2581 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2582 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2583 } 2584 if (matis->A->symmetric_set) { 2585 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2586 #if !defined(PETSC_USE_COMPLEX) 2587 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2588 #endif 2589 } 2590 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 #undef __FUNCT__ 2595 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2596 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2597 { 2598 PC_IS* pcis = (PC_IS*)(pc->data); 2599 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2600 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2601 PetscInt *idx_R_local=NULL; 2602 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2603 PetscInt vbs,bs; 2604 PetscBT bitmask=NULL; 2605 PetscErrorCode ierr; 2606 2607 PetscFunctionBegin; 2608 /* 2609 No need to setup local scatters if 2610 - primal space is unchanged 2611 AND 2612 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2613 AND 2614 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2615 */ 2616 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2617 PetscFunctionReturn(0); 2618 } 2619 /* destroy old objects */ 2620 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2621 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2622 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2623 /* Set Non-overlapping dimensions */ 2624 n_B = pcis->n_B; 2625 n_D = pcis->n - n_B; 2626 n_vertices = pcbddc->n_vertices; 2627 2628 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2629 2630 /* create auxiliary bitmask and allocate workspace */ 2631 if (!sub_schurs->reuse_solver) { 2632 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2633 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2634 for (i=0;i<n_vertices;i++) { 2635 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2636 } 2637 2638 for (i=0, n_R=0; i<pcis->n; i++) { 2639 if (!PetscBTLookup(bitmask,i)) { 2640 idx_R_local[n_R++] = i; 2641 } 2642 } 2643 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2644 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2645 2646 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2647 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2648 } 2649 2650 /* Block code */ 2651 vbs = 1; 2652 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2653 if (bs>1 && !(n_vertices%bs)) { 2654 PetscBool is_blocked = PETSC_TRUE; 2655 PetscInt *vary; 2656 if (!sub_schurs->reuse_solver) { 2657 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2658 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2659 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2660 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2661 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2662 for (i=0; i<pcis->n/bs; i++) { 2663 if (vary[i]!=0 && vary[i]!=bs) { 2664 is_blocked = PETSC_FALSE; 2665 break; 2666 } 2667 } 2668 ierr = PetscFree(vary);CHKERRQ(ierr); 2669 } else { 2670 /* Verify directly the R set */ 2671 for (i=0; i<n_R/bs; i++) { 2672 PetscInt j,node=idx_R_local[bs*i]; 2673 for (j=1; j<bs; j++) { 2674 if (node != idx_R_local[bs*i+j]-j) { 2675 is_blocked = PETSC_FALSE; 2676 break; 2677 } 2678 } 2679 } 2680 } 2681 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2682 vbs = bs; 2683 for (i=0;i<n_R/vbs;i++) { 2684 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2685 } 2686 } 2687 } 2688 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2689 if (sub_schurs->reuse_solver) { 2690 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2691 2692 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2693 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2694 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2695 reuse_solver->is_R = pcbddc->is_R_local; 2696 } else { 2697 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2698 } 2699 2700 /* print some info if requested */ 2701 if (pcbddc->dbg_flag) { 2702 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2709 } 2710 2711 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2712 if (!sub_schurs->reuse_solver) { 2713 IS is_aux1,is_aux2; 2714 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2715 2716 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2717 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2718 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2719 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2720 for (i=0; i<n_D; i++) { 2721 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2722 } 2723 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2724 for (i=0, j=0; i<n_R; i++) { 2725 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2726 aux_array1[j++] = i; 2727 } 2728 } 2729 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2730 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2731 for (i=0, j=0; i<n_B; i++) { 2732 if (!PetscBTLookup(bitmask,is_indices[i])) { 2733 aux_array2[j++] = i; 2734 } 2735 } 2736 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2738 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2739 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2740 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2741 2742 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2743 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2744 for (i=0, j=0; i<n_R; i++) { 2745 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2746 aux_array1[j++] = i; 2747 } 2748 } 2749 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2750 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2751 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2752 } 2753 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2754 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2755 } else { 2756 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2757 IS tis; 2758 PetscInt schur_size; 2759 2760 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2761 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2762 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2763 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2764 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2765 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2766 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2767 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2768 } 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2776 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2777 { 2778 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2779 PC_IS *pcis = (PC_IS*)pc->data; 2780 PC pc_temp; 2781 Mat A_RR; 2782 MatReuse reuse; 2783 PetscScalar m_one = -1.0; 2784 PetscReal value; 2785 PetscInt n_D,n_R; 2786 PetscBool use_exact,use_exact_reduced,issbaij; 2787 PetscErrorCode ierr; 2788 /* prefixes stuff */ 2789 char dir_prefix[256],neu_prefix[256],str_level[16]; 2790 size_t len; 2791 2792 PetscFunctionBegin; 2793 2794 /* compute prefixes */ 2795 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2796 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2797 if (!pcbddc->current_level) { 2798 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2799 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2800 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2801 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2802 } else { 2803 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2804 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2805 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2806 len -= 15; /* remove "pc_bddc_coarse_" */ 2807 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2808 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2809 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2810 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2811 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2812 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2813 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2814 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2815 } 2816 2817 /* DIRICHLET PROBLEM */ 2818 if (dirichlet) { 2819 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2820 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2821 if (!sub_schurs->reuse_solver) { 2822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2823 } 2824 if (pcbddc->dbg_flag) { 2825 Mat A_IIn; 2826 2827 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2828 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2829 pcis->A_II = A_IIn; 2830 } 2831 } 2832 if (pcbddc->local_mat->symmetric_set) { 2833 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2834 } 2835 /* Matrix for Dirichlet problem is pcis->A_II */ 2836 n_D = pcis->n - pcis->n_B; 2837 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2838 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2839 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2840 /* default */ 2841 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2842 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2843 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2844 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2845 if (issbaij) { 2846 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2847 } else { 2848 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2849 } 2850 /* Allow user's customization */ 2851 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2852 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2853 } 2854 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2855 if (sub_schurs->reuse_solver) { 2856 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2857 2858 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2859 } 2860 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2861 if (!n_D) { 2862 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2863 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2864 } 2865 /* Set Up KSP for Dirichlet problem of BDDC */ 2866 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2867 /* set ksp_D into pcis data */ 2868 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2869 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2870 pcis->ksp_D = pcbddc->ksp_D; 2871 } 2872 2873 /* NEUMANN PROBLEM */ 2874 A_RR = 0; 2875 if (neumann) { 2876 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2877 PetscInt ibs,mbs; 2878 PetscBool issbaij; 2879 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2880 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2881 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2882 if (pcbddc->ksp_R) { /* already created ksp */ 2883 PetscInt nn_R; 2884 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2885 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2886 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2887 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2888 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2889 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2890 reuse = MAT_INITIAL_MATRIX; 2891 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2892 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2893 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2894 reuse = MAT_INITIAL_MATRIX; 2895 } else { /* safe to reuse the matrix */ 2896 reuse = MAT_REUSE_MATRIX; 2897 } 2898 } 2899 /* last check */ 2900 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2901 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2902 reuse = MAT_INITIAL_MATRIX; 2903 } 2904 } else { /* first time, so we need to create the matrix */ 2905 reuse = MAT_INITIAL_MATRIX; 2906 } 2907 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2908 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2909 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2910 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2911 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2912 if (matis->A == pcbddc->local_mat) { 2913 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2914 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2915 } else { 2916 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2917 } 2918 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2919 if (matis->A == pcbddc->local_mat) { 2920 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2921 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2922 } else { 2923 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2924 } 2925 } 2926 /* extract A_RR */ 2927 if (sub_schurs->reuse_solver) { 2928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2929 2930 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2931 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2932 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2933 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2934 } else { 2935 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2936 } 2937 } else { 2938 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2939 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2940 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2941 } 2942 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2943 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2944 } 2945 if (pcbddc->local_mat->symmetric_set) { 2946 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2947 } 2948 if (!pcbddc->ksp_R) { /* create object if not present */ 2949 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2950 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2951 /* default */ 2952 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2953 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2954 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2955 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2956 if (issbaij) { 2957 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2958 } else { 2959 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2960 } 2961 /* Allow user's customization */ 2962 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2963 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2964 } 2965 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2966 if (!n_R) { 2967 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2968 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2969 } 2970 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2971 /* Reuse solver if it is present */ 2972 if (sub_schurs->reuse_solver) { 2973 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2974 2975 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2976 } 2977 /* Set Up KSP for Neumann problem of BDDC */ 2978 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2979 } 2980 2981 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2982 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2983 if (pcbddc->dbg_flag) { 2984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2985 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2986 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2987 } 2988 if (dirichlet) { /* Dirichlet */ 2989 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2990 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2991 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2992 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2993 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2994 /* need to be adapted? */ 2995 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2996 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2997 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2998 /* print info */ 2999 if (pcbddc->dbg_flag) { 3000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3002 } 3003 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3004 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3005 } 3006 } 3007 if (neumann) { /* Neumann */ 3008 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3009 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3010 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3011 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3012 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3013 /* need to be adapted? */ 3014 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3015 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3016 /* print info */ 3017 if (pcbddc->dbg_flag) { 3018 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3019 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3020 } 3021 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3022 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3023 } 3024 } 3025 } 3026 /* free Neumann problem's matrix */ 3027 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3033 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3034 { 3035 PetscErrorCode ierr; 3036 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3037 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3038 3039 PetscFunctionBegin; 3040 if (!sub_schurs->reuse_solver) { 3041 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3042 } 3043 if (!pcbddc->switch_static) { 3044 if (applytranspose && pcbddc->local_auxmat1) { 3045 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3046 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3047 } 3048 if (!sub_schurs->reuse_solver) { 3049 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3050 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3051 } else { 3052 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3053 3054 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3055 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3056 } 3057 } else { 3058 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3059 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3060 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3061 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3062 if (applytranspose && pcbddc->local_auxmat1) { 3063 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3064 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3065 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3066 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3067 } 3068 } 3069 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3070 if (applytranspose) { 3071 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3072 } else { 3073 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3074 } 3075 } else { 3076 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3077 3078 if (applytranspose) { 3079 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3080 } else { 3081 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3082 } 3083 } 3084 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3085 if (!pcbddc->switch_static) { 3086 if (!sub_schurs->reuse_solver) { 3087 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3088 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3089 } else { 3090 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3091 3092 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3093 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3094 } 3095 if (!applytranspose && pcbddc->local_auxmat1) { 3096 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3097 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3098 } 3099 } else { 3100 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 if (!applytranspose && pcbddc->local_auxmat1) { 3105 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3106 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3107 } 3108 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3110 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3111 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3112 } 3113 PetscFunctionReturn(0); 3114 } 3115 3116 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3117 #undef __FUNCT__ 3118 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3119 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3120 { 3121 PetscErrorCode ierr; 3122 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3123 PC_IS* pcis = (PC_IS*) (pc->data); 3124 const PetscScalar zero = 0.0; 3125 3126 PetscFunctionBegin; 3127 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3128 if (!pcbddc->benign_apply_coarse_only) { 3129 if (applytranspose) { 3130 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3131 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3132 } else { 3133 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3134 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3135 } 3136 } else { 3137 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3138 } 3139 3140 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3141 if (pcbddc->benign_n) { 3142 PetscScalar *array; 3143 PetscInt j; 3144 3145 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3146 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3147 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3148 } 3149 3150 /* start communications from local primal nodes to rhs of coarse solver */ 3151 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3152 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3153 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3154 3155 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3156 /* TODO remove null space when doing multilevel */ 3157 if (pcbddc->coarse_ksp) { 3158 Mat coarse_mat; 3159 Vec rhs,sol; 3160 MatNullSpace nullsp; 3161 PetscBool isbddc = PETSC_FALSE; 3162 3163 if (pcbddc->benign_have_null) { 3164 PC coarse_pc; 3165 3166 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3167 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3168 /* we need to propagate to coarser levels the need for a possible benign correction */ 3169 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3170 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3171 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3172 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3173 } 3174 } 3175 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3176 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3177 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3178 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3179 if (nullsp) { 3180 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3181 } 3182 if (applytranspose) { 3183 if (pcbddc->benign_apply_coarse_only) { /* need just to apply the coarse preconditioner */ 3184 PC coarse_pc; 3185 3186 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3187 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3188 ierr = PCApplyTranspose(coarse_pc,rhs,sol);CHKERRQ(ierr); 3189 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3190 } else { 3191 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3192 } 3193 } else { 3194 if (pcbddc->benign_apply_coarse_only) { /* need just to apply the coarse preconditioner */ 3195 PC coarse_pc; 3196 3197 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3198 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3199 ierr = PCApply(coarse_pc,rhs,sol);CHKERRQ(ierr); 3200 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3201 } else { 3202 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3203 } 3204 } 3205 /* we don't the benign correction at coarser levels anymore */ 3206 if (pcbddc->benign_have_null && isbddc) { 3207 PC coarse_pc; 3208 PC_BDDC* coarsepcbddc; 3209 3210 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3211 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3212 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3213 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3214 } 3215 if (nullsp) { 3216 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3217 } 3218 } 3219 3220 /* Local solution on R nodes */ 3221 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3222 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3223 } 3224 /* communications from coarse sol to local primal nodes */ 3225 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3226 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3227 3228 /* Sum contributions from the two levels */ 3229 if (!pcbddc->benign_apply_coarse_only) { 3230 if (applytranspose) { 3231 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3232 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3233 } else { 3234 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3235 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3236 } 3237 /* store p0 */ 3238 if (pcbddc->benign_n) { 3239 PetscScalar *array; 3240 PetscInt j; 3241 3242 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3243 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3244 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3245 } 3246 } else { /* expand the coarse solution */ 3247 if (applytranspose) { 3248 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3249 } else { 3250 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3251 } 3252 } 3253 PetscFunctionReturn(0); 3254 } 3255 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3258 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3259 { 3260 PetscErrorCode ierr; 3261 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3262 PetscScalar *array; 3263 Vec from,to; 3264 3265 PetscFunctionBegin; 3266 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3267 from = pcbddc->coarse_vec; 3268 to = pcbddc->vec1_P; 3269 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3270 Vec tvec; 3271 3272 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3273 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3274 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3275 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3276 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3277 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3278 } 3279 } else { /* from local to global -> put data in coarse right hand side */ 3280 from = pcbddc->vec1_P; 3281 to = pcbddc->coarse_vec; 3282 } 3283 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3284 PetscFunctionReturn(0); 3285 } 3286 3287 #undef __FUNCT__ 3288 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3289 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3290 { 3291 PetscErrorCode ierr; 3292 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3293 PetscScalar *array; 3294 Vec from,to; 3295 3296 PetscFunctionBegin; 3297 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3298 from = pcbddc->coarse_vec; 3299 to = pcbddc->vec1_P; 3300 } else { /* from local to global -> put data in coarse right hand side */ 3301 from = pcbddc->vec1_P; 3302 to = pcbddc->coarse_vec; 3303 } 3304 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3305 if (smode == SCATTER_FORWARD) { 3306 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3307 Vec tvec; 3308 3309 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3310 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3311 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3312 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3313 } 3314 } else { 3315 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3316 ierr = VecResetArray(from);CHKERRQ(ierr); 3317 } 3318 } 3319 PetscFunctionReturn(0); 3320 } 3321 3322 /* uncomment for testing purposes */ 3323 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3324 #undef __FUNCT__ 3325 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3326 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3327 { 3328 PetscErrorCode ierr; 3329 PC_IS* pcis = (PC_IS*)(pc->data); 3330 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3331 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3332 /* one and zero */ 3333 PetscScalar one=1.0,zero=0.0; 3334 /* space to store constraints and their local indices */ 3335 PetscScalar *constraints_data; 3336 PetscInt *constraints_idxs,*constraints_idxs_B; 3337 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3338 PetscInt *constraints_n; 3339 /* iterators */ 3340 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3341 /* BLAS integers */ 3342 PetscBLASInt lwork,lierr; 3343 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3344 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3345 /* reuse */ 3346 PetscInt olocal_primal_size,olocal_primal_size_cc; 3347 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3348 /* change of basis */ 3349 PetscBool qr_needed; 3350 PetscBT change_basis,qr_needed_idx; 3351 /* auxiliary stuff */ 3352 PetscInt *nnz,*is_indices; 3353 PetscInt ncc; 3354 /* some quantities */ 3355 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3356 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3357 3358 PetscFunctionBegin; 3359 /* Destroy Mat objects computed previously */ 3360 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3361 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3362 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3363 /* save info on constraints from previous setup (if any) */ 3364 olocal_primal_size = pcbddc->local_primal_size; 3365 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3366 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3367 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3368 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3369 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3370 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3371 3372 if (!pcbddc->adaptive_selection) { 3373 IS ISForVertices,*ISForFaces,*ISForEdges; 3374 MatNullSpace nearnullsp; 3375 const Vec *nearnullvecs; 3376 Vec *localnearnullsp; 3377 PetscScalar *array; 3378 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3379 PetscBool nnsp_has_cnst; 3380 /* LAPACK working arrays for SVD or POD */ 3381 PetscBool skip_lapack,boolforchange; 3382 PetscScalar *work; 3383 PetscReal *singular_vals; 3384 #if defined(PETSC_USE_COMPLEX) 3385 PetscReal *rwork; 3386 #endif 3387 #if defined(PETSC_MISSING_LAPACK_GESVD) 3388 PetscScalar *temp_basis,*correlation_mat; 3389 #else 3390 PetscBLASInt dummy_int=1; 3391 PetscScalar dummy_scalar=1.; 3392 #endif 3393 3394 /* Get index sets for faces, edges and vertices from graph */ 3395 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3396 /* print some info */ 3397 if (pcbddc->dbg_flag) { 3398 PetscInt nv; 3399 3400 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3401 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3402 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3407 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3408 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3409 } 3410 3411 /* free unneeded index sets */ 3412 if (!pcbddc->use_vertices) { 3413 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3414 } 3415 if (!pcbddc->use_edges) { 3416 for (i=0;i<n_ISForEdges;i++) { 3417 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3418 } 3419 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3420 n_ISForEdges = 0; 3421 } 3422 if (!pcbddc->use_faces) { 3423 for (i=0;i<n_ISForFaces;i++) { 3424 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3425 } 3426 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3427 n_ISForFaces = 0; 3428 } 3429 3430 #if defined(PETSC_USE_DEBUG) 3431 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3432 Also use_change_of_basis should be consistent among processors */ 3433 if (pcbddc->NullSpace) { 3434 PetscBool tbool[2],gbool[2]; 3435 3436 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3437 pcbddc->use_change_of_basis = PETSC_TRUE; 3438 if (!ISForEdges) { 3439 pcbddc->use_change_on_faces = PETSC_TRUE; 3440 } 3441 } 3442 tbool[0] = pcbddc->use_change_of_basis; 3443 tbool[1] = pcbddc->use_change_on_faces; 3444 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3445 pcbddc->use_change_of_basis = gbool[0]; 3446 pcbddc->use_change_on_faces = gbool[1]; 3447 } 3448 #endif 3449 3450 /* check if near null space is attached to global mat */ 3451 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3452 if (nearnullsp) { 3453 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3454 /* remove any stored info */ 3455 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3456 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3457 /* store information for BDDC solver reuse */ 3458 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3459 pcbddc->onearnullspace = nearnullsp; 3460 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3461 for (i=0;i<nnsp_size;i++) { 3462 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3463 } 3464 } else { /* if near null space is not provided BDDC uses constants by default */ 3465 nnsp_size = 0; 3466 nnsp_has_cnst = PETSC_TRUE; 3467 } 3468 /* get max number of constraints on a single cc */ 3469 max_constraints = nnsp_size; 3470 if (nnsp_has_cnst) max_constraints++; 3471 3472 /* 3473 Evaluate maximum storage size needed by the procedure 3474 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3475 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3476 There can be multiple constraints per connected component 3477 */ 3478 n_vertices = 0; 3479 if (ISForVertices) { 3480 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3481 } 3482 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3483 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3484 3485 total_counts = n_ISForFaces+n_ISForEdges; 3486 total_counts *= max_constraints; 3487 total_counts += n_vertices; 3488 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3489 3490 total_counts = 0; 3491 max_size_of_constraint = 0; 3492 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3493 IS used_is; 3494 if (i<n_ISForEdges) { 3495 used_is = ISForEdges[i]; 3496 } else { 3497 used_is = ISForFaces[i-n_ISForEdges]; 3498 } 3499 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3500 total_counts += j; 3501 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3502 } 3503 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3504 3505 /* get local part of global near null space vectors */ 3506 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3507 for (k=0;k<nnsp_size;k++) { 3508 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3509 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3510 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3511 } 3512 3513 /* whether or not to skip lapack calls */ 3514 skip_lapack = PETSC_TRUE; 3515 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3516 3517 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3518 if (!skip_lapack) { 3519 PetscScalar temp_work; 3520 3521 #if defined(PETSC_MISSING_LAPACK_GESVD) 3522 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3523 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3524 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3525 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3526 #if defined(PETSC_USE_COMPLEX) 3527 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3528 #endif 3529 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3530 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3531 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3532 lwork = -1; 3533 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3534 #if !defined(PETSC_USE_COMPLEX) 3535 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3536 #else 3537 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3538 #endif 3539 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3540 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3541 #else /* on missing GESVD */ 3542 /* SVD */ 3543 PetscInt max_n,min_n; 3544 max_n = max_size_of_constraint; 3545 min_n = max_constraints; 3546 if (max_size_of_constraint < max_constraints) { 3547 min_n = max_size_of_constraint; 3548 max_n = max_constraints; 3549 } 3550 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3551 #if defined(PETSC_USE_COMPLEX) 3552 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3553 #endif 3554 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3555 lwork = -1; 3556 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3557 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3558 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3559 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3560 #if !defined(PETSC_USE_COMPLEX) 3561 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3562 #else 3563 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3564 #endif 3565 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3566 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3567 #endif /* on missing GESVD */ 3568 /* Allocate optimal workspace */ 3569 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3570 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3571 } 3572 /* Now we can loop on constraining sets */ 3573 total_counts = 0; 3574 constraints_idxs_ptr[0] = 0; 3575 constraints_data_ptr[0] = 0; 3576 /* vertices */ 3577 if (n_vertices) { 3578 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3579 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3580 for (i=0;i<n_vertices;i++) { 3581 constraints_n[total_counts] = 1; 3582 constraints_data[total_counts] = 1.0; 3583 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3584 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3585 total_counts++; 3586 } 3587 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3588 n_vertices = total_counts; 3589 } 3590 3591 /* edges and faces */ 3592 total_counts_cc = total_counts; 3593 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3594 IS used_is; 3595 PetscBool idxs_copied = PETSC_FALSE; 3596 3597 if (ncc<n_ISForEdges) { 3598 used_is = ISForEdges[ncc]; 3599 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3600 } else { 3601 used_is = ISForFaces[ncc-n_ISForEdges]; 3602 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3603 } 3604 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3605 3606 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3607 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3608 /* change of basis should not be performed on local periodic nodes */ 3609 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3610 if (nnsp_has_cnst) { 3611 PetscScalar quad_value; 3612 3613 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3614 idxs_copied = PETSC_TRUE; 3615 3616 if (!pcbddc->use_nnsp_true) { 3617 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3618 } else { 3619 quad_value = 1.0; 3620 } 3621 for (j=0;j<size_of_constraint;j++) { 3622 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3623 } 3624 temp_constraints++; 3625 total_counts++; 3626 } 3627 for (k=0;k<nnsp_size;k++) { 3628 PetscReal real_value; 3629 PetscScalar *ptr_to_data; 3630 3631 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3632 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3633 for (j=0;j<size_of_constraint;j++) { 3634 ptr_to_data[j] = array[is_indices[j]]; 3635 } 3636 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3637 /* check if array is null on the connected component */ 3638 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3639 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3640 if (real_value > 0.0) { /* keep indices and values */ 3641 temp_constraints++; 3642 total_counts++; 3643 if (!idxs_copied) { 3644 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3645 idxs_copied = PETSC_TRUE; 3646 } 3647 } 3648 } 3649 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3650 valid_constraints = temp_constraints; 3651 if (!pcbddc->use_nnsp_true && temp_constraints) { 3652 if (temp_constraints == 1) { /* just normalize the constraint */ 3653 PetscScalar norm,*ptr_to_data; 3654 3655 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3656 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3657 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3658 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3659 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3660 } else { /* perform SVD */ 3661 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3662 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3663 3664 #if defined(PETSC_MISSING_LAPACK_GESVD) 3665 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3666 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3667 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3668 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3669 from that computed using LAPACKgesvd 3670 -> This is due to a different computation of eigenvectors in LAPACKheev 3671 -> The quality of the POD-computed basis will be the same */ 3672 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3673 /* Store upper triangular part of correlation matrix */ 3674 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3675 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3676 for (j=0;j<temp_constraints;j++) { 3677 for (k=0;k<j+1;k++) { 3678 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3679 } 3680 } 3681 /* compute eigenvalues and eigenvectors of correlation matrix */ 3682 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3683 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3684 #if !defined(PETSC_USE_COMPLEX) 3685 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3686 #else 3687 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3688 #endif 3689 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3690 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3691 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3692 j = 0; 3693 while (j < temp_constraints && singular_vals[j] < tol) j++; 3694 total_counts = total_counts-j; 3695 valid_constraints = temp_constraints-j; 3696 /* scale and copy POD basis into used quadrature memory */ 3697 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3698 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3699 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3700 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3701 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3702 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3703 if (j<temp_constraints) { 3704 PetscInt ii; 3705 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3706 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3707 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3708 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3709 for (k=0;k<temp_constraints-j;k++) { 3710 for (ii=0;ii<size_of_constraint;ii++) { 3711 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3712 } 3713 } 3714 } 3715 #else /* on missing GESVD */ 3716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3717 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3718 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3719 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3720 #if !defined(PETSC_USE_COMPLEX) 3721 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3722 #else 3723 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3724 #endif 3725 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3726 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3727 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3728 k = temp_constraints; 3729 if (k > size_of_constraint) k = size_of_constraint; 3730 j = 0; 3731 while (j < k && singular_vals[k-j-1] < tol) j++; 3732 valid_constraints = k-j; 3733 total_counts = total_counts-temp_constraints+valid_constraints; 3734 #endif /* on missing GESVD */ 3735 } 3736 } 3737 /* update pointers information */ 3738 if (valid_constraints) { 3739 constraints_n[total_counts_cc] = valid_constraints; 3740 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3741 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3742 /* set change_of_basis flag */ 3743 if (boolforchange) { 3744 PetscBTSet(change_basis,total_counts_cc); 3745 } 3746 total_counts_cc++; 3747 } 3748 } 3749 /* free workspace */ 3750 if (!skip_lapack) { 3751 ierr = PetscFree(work);CHKERRQ(ierr); 3752 #if defined(PETSC_USE_COMPLEX) 3753 ierr = PetscFree(rwork);CHKERRQ(ierr); 3754 #endif 3755 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3756 #if defined(PETSC_MISSING_LAPACK_GESVD) 3757 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3758 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3759 #endif 3760 } 3761 for (k=0;k<nnsp_size;k++) { 3762 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3763 } 3764 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3765 /* free index sets of faces, edges and vertices */ 3766 for (i=0;i<n_ISForFaces;i++) { 3767 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3768 } 3769 if (n_ISForFaces) { 3770 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3771 } 3772 for (i=0;i<n_ISForEdges;i++) { 3773 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3774 } 3775 if (n_ISForEdges) { 3776 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3777 } 3778 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3779 } else { 3780 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3781 3782 total_counts = 0; 3783 n_vertices = 0; 3784 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3785 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3786 } 3787 max_constraints = 0; 3788 total_counts_cc = 0; 3789 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3790 total_counts += pcbddc->adaptive_constraints_n[i]; 3791 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3792 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3793 } 3794 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3795 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3796 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3797 constraints_data = pcbddc->adaptive_constraints_data; 3798 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3799 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3800 total_counts_cc = 0; 3801 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3802 if (pcbddc->adaptive_constraints_n[i]) { 3803 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3804 } 3805 } 3806 #if 0 3807 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3808 for (i=0;i<total_counts_cc;i++) { 3809 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3810 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3811 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3812 printf(" %d",constraints_idxs[j]); 3813 } 3814 printf("\n"); 3815 printf("number of cc: %d\n",constraints_n[i]); 3816 } 3817 for (i=0;i<n_vertices;i++) { 3818 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3819 } 3820 for (i=0;i<sub_schurs->n_subs;i++) { 3821 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3822 } 3823 #endif 3824 3825 max_size_of_constraint = 0; 3826 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3827 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3828 /* Change of basis */ 3829 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3830 if (pcbddc->use_change_of_basis) { 3831 for (i=0;i<sub_schurs->n_subs;i++) { 3832 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3833 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3834 } 3835 } 3836 } 3837 } 3838 pcbddc->local_primal_size = total_counts; 3839 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3840 3841 /* map constraints_idxs in boundary numbering */ 3842 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3843 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3844 3845 /* Create constraint matrix */ 3846 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3847 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3848 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3849 3850 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3851 /* determine if a QR strategy is needed for change of basis */ 3852 qr_needed = PETSC_FALSE; 3853 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3854 total_primal_vertices=0; 3855 pcbddc->local_primal_size_cc = 0; 3856 for (i=0;i<total_counts_cc;i++) { 3857 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3858 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3859 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3860 pcbddc->local_primal_size_cc += 1; 3861 } else if (PetscBTLookup(change_basis,i)) { 3862 for (k=0;k<constraints_n[i];k++) { 3863 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3864 } 3865 pcbddc->local_primal_size_cc += constraints_n[i]; 3866 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 3867 PetscBTSet(qr_needed_idx,i); 3868 qr_needed = PETSC_TRUE; 3869 } 3870 } else { 3871 pcbddc->local_primal_size_cc += 1; 3872 } 3873 } 3874 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3875 pcbddc->n_vertices = total_primal_vertices; 3876 /* permute indices in order to have a sorted set of vertices */ 3877 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3878 3879 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3880 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3881 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3882 3883 /* nonzero structure of constraint matrix */ 3884 /* and get reference dof for local constraints */ 3885 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3886 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3887 3888 j = total_primal_vertices; 3889 total_counts = total_primal_vertices; 3890 cum = total_primal_vertices; 3891 for (i=n_vertices;i<total_counts_cc;i++) { 3892 if (!PetscBTLookup(change_basis,i)) { 3893 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3894 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3895 cum++; 3896 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3897 for (k=0;k<constraints_n[i];k++) { 3898 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3899 nnz[j+k] = size_of_constraint; 3900 } 3901 j += constraints_n[i]; 3902 } 3903 } 3904 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3905 ierr = PetscFree(nnz);CHKERRQ(ierr); 3906 3907 /* set values in constraint matrix */ 3908 for (i=0;i<total_primal_vertices;i++) { 3909 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3910 } 3911 total_counts = total_primal_vertices; 3912 for (i=n_vertices;i<total_counts_cc;i++) { 3913 if (!PetscBTLookup(change_basis,i)) { 3914 PetscInt *cols; 3915 3916 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3917 cols = constraints_idxs+constraints_idxs_ptr[i]; 3918 for (k=0;k<constraints_n[i];k++) { 3919 PetscInt row = total_counts+k; 3920 PetscScalar *vals; 3921 3922 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3923 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3924 } 3925 total_counts += constraints_n[i]; 3926 } 3927 } 3928 /* assembling */ 3929 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3930 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 3932 /* 3933 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3934 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3935 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3936 */ 3937 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3938 if (pcbddc->use_change_of_basis) { 3939 /* dual and primal dofs on a single cc */ 3940 PetscInt dual_dofs,primal_dofs; 3941 /* working stuff for GEQRF */ 3942 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3943 PetscBLASInt lqr_work; 3944 /* working stuff for UNGQR */ 3945 PetscScalar *gqr_work,lgqr_work_t; 3946 PetscBLASInt lgqr_work; 3947 /* working stuff for TRTRS */ 3948 PetscScalar *trs_rhs; 3949 PetscBLASInt Blas_NRHS; 3950 /* pointers for values insertion into change of basis matrix */ 3951 PetscInt *start_rows,*start_cols; 3952 PetscScalar *start_vals; 3953 /* working stuff for values insertion */ 3954 PetscBT is_primal; 3955 PetscInt *aux_primal_numbering_B; 3956 /* matrix sizes */ 3957 PetscInt global_size,local_size; 3958 /* temporary change of basis */ 3959 Mat localChangeOfBasisMatrix; 3960 /* extra space for debugging */ 3961 PetscScalar *dbg_work; 3962 3963 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3964 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3965 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3966 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3967 /* nonzeros for local mat */ 3968 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3969 if (!pcbddc->benign_change || pcbddc->fake_change) { 3970 for (i=0;i<pcis->n;i++) nnz[i]=1; 3971 } else { 3972 const PetscInt *ii; 3973 PetscInt n; 3974 PetscBool flg_row; 3975 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3976 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3977 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3978 } 3979 for (i=n_vertices;i<total_counts_cc;i++) { 3980 if (PetscBTLookup(change_basis,i)) { 3981 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3982 if (PetscBTLookup(qr_needed_idx,i)) { 3983 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3984 } else { 3985 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3986 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3987 } 3988 } 3989 } 3990 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3991 ierr = PetscFree(nnz);CHKERRQ(ierr); 3992 /* Set interior change in the matrix */ 3993 if (!pcbddc->benign_change || pcbddc->fake_change) { 3994 for (i=0;i<pcis->n;i++) { 3995 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3996 } 3997 } else { 3998 const PetscInt *ii,*jj; 3999 PetscScalar *aa; 4000 PetscInt n; 4001 PetscBool flg_row; 4002 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4003 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4004 for (i=0;i<n;i++) { 4005 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4006 } 4007 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4008 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4009 } 4010 4011 if (pcbddc->dbg_flag) { 4012 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4013 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4014 } 4015 4016 4017 /* Now we loop on the constraints which need a change of basis */ 4018 /* 4019 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4020 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4021 4022 Basic blocks of change of basis matrix T computed by 4023 4024 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4025 4026 | 1 0 ... 0 s_1/S | 4027 | 0 1 ... 0 s_2/S | 4028 | ... | 4029 | 0 ... 1 s_{n-1}/S | 4030 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4031 4032 with S = \sum_{i=1}^n s_i^2 4033 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4034 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4035 4036 - QR decomposition of constraints otherwise 4037 */ 4038 if (qr_needed) { 4039 /* space to store Q */ 4040 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4041 /* first we issue queries for optimal work */ 4042 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4043 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4044 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4045 lqr_work = -1; 4046 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4047 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4048 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4049 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4050 lgqr_work = -1; 4051 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4052 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4053 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4054 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4055 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4056 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4057 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4058 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4059 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4060 /* array to store scaling factors for reflectors */ 4061 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4062 /* array to store rhs and solution of triangular solver */ 4063 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4064 /* allocating workspace for check */ 4065 if (pcbddc->dbg_flag) { 4066 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4067 } 4068 } 4069 /* array to store whether a node is primal or not */ 4070 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4071 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4072 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4073 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4074 for (i=0;i<total_primal_vertices;i++) { 4075 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4076 } 4077 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4078 4079 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4080 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4081 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4082 if (PetscBTLookup(change_basis,total_counts)) { 4083 /* get constraint info */ 4084 primal_dofs = constraints_n[total_counts]; 4085 dual_dofs = size_of_constraint-primal_dofs; 4086 4087 if (pcbddc->dbg_flag) { 4088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4089 } 4090 4091 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4092 4093 /* copy quadrature constraints for change of basis check */ 4094 if (pcbddc->dbg_flag) { 4095 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4096 } 4097 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4098 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4099 4100 /* compute QR decomposition of constraints */ 4101 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4102 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4103 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4104 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4105 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4106 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4107 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4108 4109 /* explictly compute R^-T */ 4110 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4111 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4112 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4113 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4115 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4117 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4118 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4119 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4120 4121 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4124 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4126 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4127 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4128 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4129 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4130 4131 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4132 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4133 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4134 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4135 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4136 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4137 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4138 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4139 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4140 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4141 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4142 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4143 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4144 4145 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4146 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4147 /* insert cols for primal dofs */ 4148 for (j=0;j<primal_dofs;j++) { 4149 start_vals = &qr_basis[j*size_of_constraint]; 4150 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4151 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4152 } 4153 /* insert cols for dual dofs */ 4154 for (j=0,k=0;j<dual_dofs;k++) { 4155 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4156 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4157 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4158 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4159 j++; 4160 } 4161 } 4162 4163 /* check change of basis */ 4164 if (pcbddc->dbg_flag) { 4165 PetscInt ii,jj; 4166 PetscBool valid_qr=PETSC_TRUE; 4167 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4168 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4169 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4170 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4171 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4172 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4173 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4174 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4175 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4176 for (jj=0;jj<size_of_constraint;jj++) { 4177 for (ii=0;ii<primal_dofs;ii++) { 4178 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4179 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4180 } 4181 } 4182 if (!valid_qr) { 4183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4184 for (jj=0;jj<size_of_constraint;jj++) { 4185 for (ii=0;ii<primal_dofs;ii++) { 4186 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4187 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4188 } 4189 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4190 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4191 } 4192 } 4193 } 4194 } else { 4195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4196 } 4197 } 4198 } else { /* simple transformation block */ 4199 PetscInt row,col; 4200 PetscScalar val,norm; 4201 4202 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4203 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4204 for (j=0;j<size_of_constraint;j++) { 4205 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4206 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4207 if (!PetscBTLookup(is_primal,row_B)) { 4208 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4209 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4210 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4211 } else { 4212 for (k=0;k<size_of_constraint;k++) { 4213 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4214 if (row != col) { 4215 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4216 } else { 4217 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4218 } 4219 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4220 } 4221 } 4222 } 4223 if (pcbddc->dbg_flag) { 4224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4225 } 4226 } 4227 } else { 4228 if (pcbddc->dbg_flag) { 4229 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4230 } 4231 } 4232 } 4233 4234 /* free workspace */ 4235 if (qr_needed) { 4236 if (pcbddc->dbg_flag) { 4237 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4238 } 4239 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4240 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4241 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4242 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4243 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4244 } 4245 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4246 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4247 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4248 4249 /* assembling of global change of variable */ 4250 if (!pcbddc->fake_change) { 4251 Mat tmat; 4252 PetscInt bs; 4253 4254 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4255 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4256 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4257 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4258 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4259 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4260 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4261 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4262 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4263 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4264 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4265 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4266 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4267 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4268 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4269 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4270 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4271 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4272 4273 /* check */ 4274 if (pcbddc->dbg_flag) { 4275 PetscReal error; 4276 Vec x,x_change; 4277 4278 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4279 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4280 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4281 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4282 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4283 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4284 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4285 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4286 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4287 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4288 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4289 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4291 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4292 ierr = VecDestroy(&x);CHKERRQ(ierr); 4293 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4294 } 4295 /* adapt sub_schurs computed (if any) */ 4296 if (pcbddc->use_deluxe_scaling) { 4297 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4298 4299 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4300 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4301 } 4302 if (sub_schurs->S_Ej_all) { 4303 Mat S_new,tmat; 4304 IS is_all_N,is_V_Sall = NULL; 4305 4306 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4307 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4308 if (pcbddc->deluxe_zerorows) { 4309 ISLocalToGlobalMapping NtoSall; 4310 IS is_V; 4311 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4312 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4313 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4314 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4315 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4316 } 4317 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4318 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4319 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4320 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4321 if (pcbddc->deluxe_zerorows) { 4322 const PetscScalar *array; 4323 const PetscInt *idxs_V,*idxs_all; 4324 PetscInt i,n_V; 4325 4326 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4327 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4328 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4329 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4330 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4331 for (i=0;i<n_V;i++) { 4332 PetscScalar val; 4333 PetscInt idx; 4334 4335 idx = idxs_V[i]; 4336 val = array[idxs_all[idxs_V[i]]]; 4337 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4338 } 4339 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4340 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4341 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4342 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4343 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4344 } 4345 sub_schurs->S_Ej_all = S_new; 4346 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4347 if (sub_schurs->sum_S_Ej_all) { 4348 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4349 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4350 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4351 if (pcbddc->deluxe_zerorows) { 4352 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4353 } 4354 sub_schurs->sum_S_Ej_all = S_new; 4355 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4356 } 4357 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4358 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4359 } 4360 /* destroy any change of basis context in sub_schurs */ 4361 if (sub_schurs->change) { 4362 PetscInt i; 4363 4364 for (i=0;i<sub_schurs->n_subs;i++) { 4365 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4366 } 4367 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4368 } 4369 } 4370 if (pcbddc->switch_static) { /* need to save the local change */ 4371 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4372 } else { 4373 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4374 } 4375 /* determine if any process has changed the pressures locally */ 4376 pcbddc->change_interior = pcbddc->benign_have_null; 4377 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4378 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4379 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4380 pcbddc->use_qr_single = qr_needed; 4381 } 4382 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4383 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4384 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4385 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4386 } else { 4387 Mat benign_global = NULL; 4388 if (pcbddc->benign_have_null) { 4389 Mat tmat; 4390 4391 pcbddc->change_interior = PETSC_TRUE; 4392 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4393 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4394 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4395 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4396 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4397 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4398 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4399 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4400 if (pcbddc->benign_change) { 4401 Mat M; 4402 4403 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4404 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4405 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4406 ierr = MatDestroy(&M);CHKERRQ(ierr); 4407 } else { 4408 Mat eye; 4409 PetscScalar *array; 4410 4411 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4412 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4413 for (i=0;i<pcis->n;i++) { 4414 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4415 } 4416 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4417 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4418 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4419 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4420 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4421 } 4422 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4423 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4424 } 4425 if (pcbddc->user_ChangeOfBasisMatrix) { 4426 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4427 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4428 } else if (pcbddc->benign_have_null) { 4429 pcbddc->ChangeOfBasisMatrix = benign_global; 4430 } 4431 } 4432 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4433 IS is_global; 4434 const PetscInt *gidxs; 4435 4436 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4437 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4438 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4439 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4440 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4441 } 4442 } 4443 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4444 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4445 } 4446 4447 if (!pcbddc->fake_change) { 4448 /* add pressure dofs to set of primal nodes for numbering purposes */ 4449 for (i=0;i<pcbddc->benign_n;i++) { 4450 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4451 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4452 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4453 pcbddc->local_primal_size_cc++; 4454 pcbddc->local_primal_size++; 4455 } 4456 4457 /* check if a new primal space has been introduced (also take into account benign trick) */ 4458 pcbddc->new_primal_space_local = PETSC_TRUE; 4459 if (olocal_primal_size == pcbddc->local_primal_size) { 4460 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4461 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4462 if (!pcbddc->new_primal_space_local) { 4463 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4464 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4465 } 4466 } 4467 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4468 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4469 } 4470 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4471 4472 /* flush dbg viewer */ 4473 if (pcbddc->dbg_flag) { 4474 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4475 } 4476 4477 /* free workspace */ 4478 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4479 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4480 if (!pcbddc->adaptive_selection) { 4481 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4482 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4483 } else { 4484 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4485 pcbddc->adaptive_constraints_idxs_ptr, 4486 pcbddc->adaptive_constraints_data_ptr, 4487 pcbddc->adaptive_constraints_idxs, 4488 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4489 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4490 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4491 } 4492 PetscFunctionReturn(0); 4493 } 4494 4495 #undef __FUNCT__ 4496 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4497 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4498 { 4499 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4500 PC_IS *pcis = (PC_IS*)pc->data; 4501 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4502 PetscInt ierr,i,N; 4503 4504 PetscFunctionBegin; 4505 /* Reset previously computed graph */ 4506 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4507 /* Init local Graph struct */ 4508 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4509 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4510 4511 /* Check validity of the csr graph passed in by the user */ 4512 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4513 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4514 } 4515 4516 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4517 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4518 PetscInt *xadj,*adjncy; 4519 PetscInt nvtxs; 4520 PetscBool flg_row=PETSC_FALSE; 4521 4522 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4523 if (flg_row) { 4524 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4525 pcbddc->computed_rowadj = PETSC_TRUE; 4526 } 4527 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4528 } 4529 if (pcbddc->dbg_flag) { 4530 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4531 } 4532 4533 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4534 if (pcbddc->user_provided_isfordofs) { 4535 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4536 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4537 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4538 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4539 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4540 } 4541 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4542 pcbddc->n_ISForDofs = 0; 4543 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4544 } 4545 } else { 4546 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4547 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4548 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4549 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4550 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4551 } 4552 } 4553 } 4554 4555 /* Setup of Graph */ 4556 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4557 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4558 } 4559 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4560 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4561 } 4562 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4563 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4564 } 4565 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4566 4567 /* attach info on disconnected subdomains if present */ 4568 if (pcbddc->n_local_subs) { 4569 PetscInt *local_subs; 4570 4571 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4572 for (i=0;i<pcbddc->n_local_subs;i++) { 4573 const PetscInt *idxs; 4574 PetscInt nl,j; 4575 4576 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4577 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4578 for (j=0;j<nl;j++) { 4579 local_subs[idxs[j]] = i; 4580 } 4581 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4582 } 4583 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4584 pcbddc->mat_graph->local_subs = local_subs; 4585 } 4586 4587 /* Graph's connected components analysis */ 4588 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4589 PetscFunctionReturn(0); 4590 } 4591 4592 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4593 #undef __FUNCT__ 4594 #define __FUNCT__ "PCBDDCSubsetNumbering" 4595 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4596 { 4597 PetscSF sf; 4598 PetscLayout map; 4599 const PetscInt *idxs; 4600 PetscInt *leaf_data,*root_data,*gidxs; 4601 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4602 PetscInt n_n,nlocals,start,first_index; 4603 PetscMPIInt commsize; 4604 PetscBool first_found; 4605 PetscErrorCode ierr; 4606 4607 PetscFunctionBegin; 4608 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4609 if (subset_mult) { 4610 PetscCheckSameComm(subset,1,subset_mult,2); 4611 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4612 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4613 } 4614 /* create workspace layout for computing global indices of subset */ 4615 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4616 lbounds[0] = lbounds[1] = 0; 4617 for (i=0;i<n;i++) { 4618 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4619 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4620 } 4621 lbounds[0] = -lbounds[0]; 4622 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4623 gbounds[0] = -gbounds[0]; 4624 N = gbounds[1] - gbounds[0] + 1; 4625 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4626 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4627 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4628 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4629 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4630 4631 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4632 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4633 if (subset_mult) { 4634 const PetscInt* idxs_mult; 4635 4636 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4637 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4638 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4639 } else { 4640 for (i=0;i<n;i++) leaf_data[i] = 1; 4641 } 4642 /* local size of new subset */ 4643 n_n = 0; 4644 for (i=0;i<n;i++) n_n += leaf_data[i]; 4645 4646 /* global indexes in layout */ 4647 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4648 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4649 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4650 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4651 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4652 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4653 4654 /* reduce from leaves to roots */ 4655 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4656 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4657 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4658 4659 /* count indexes in local part of layout */ 4660 nlocals = 0; 4661 first_index = -1; 4662 first_found = PETSC_FALSE; 4663 for (i=0;i<Nl;i++) { 4664 if (!first_found && root_data[i]) { 4665 first_found = PETSC_TRUE; 4666 first_index = i; 4667 } 4668 nlocals += root_data[i]; 4669 } 4670 4671 /* cumulative of number of indexes and size of subset without holes */ 4672 #if defined(PETSC_HAVE_MPI_EXSCAN) 4673 start = 0; 4674 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4675 #else 4676 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4677 start = start-nlocals; 4678 #endif 4679 4680 if (N_n) { /* compute total size of new subset if requested */ 4681 *N_n = start + nlocals; 4682 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4683 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4684 } 4685 4686 /* adapt root data with cumulative */ 4687 if (first_found) { 4688 PetscInt old_index; 4689 4690 root_data[first_index] += start; 4691 old_index = first_index; 4692 for (i=first_index+1;i<Nl;i++) { 4693 if (root_data[i]) { 4694 root_data[i] += root_data[old_index]; 4695 old_index = i; 4696 } 4697 } 4698 } 4699 4700 /* from roots to leaves */ 4701 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4702 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4703 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4704 4705 /* create new IS with global indexes without holes */ 4706 if (subset_mult) { 4707 const PetscInt* idxs_mult; 4708 PetscInt cum; 4709 4710 cum = 0; 4711 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4712 for (i=0;i<n;i++) { 4713 PetscInt j; 4714 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4715 } 4716 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4717 } else { 4718 for (i=0;i<n;i++) { 4719 gidxs[i] = leaf_data[i]-1; 4720 } 4721 } 4722 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4723 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4724 PetscFunctionReturn(0); 4725 } 4726 4727 #undef __FUNCT__ 4728 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4729 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4730 { 4731 PetscInt i,j; 4732 PetscScalar *alphas; 4733 PetscErrorCode ierr; 4734 4735 PetscFunctionBegin; 4736 /* this implements stabilized Gram-Schmidt */ 4737 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4738 for (i=0;i<n;i++) { 4739 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4740 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4741 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4742 } 4743 ierr = PetscFree(alphas);CHKERRQ(ierr); 4744 PetscFunctionReturn(0); 4745 } 4746 4747 #undef __FUNCT__ 4748 #define __FUNCT__ "MatISGetSubassemblingPattern" 4749 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4750 { 4751 Mat A; 4752 PetscInt n_neighs,*neighs,*n_shared,**shared; 4753 PetscMPIInt size,rank,color; 4754 PetscInt *xadj,*adjncy; 4755 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4756 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4757 PetscInt void_procs,*procs_candidates = NULL; 4758 PetscInt xadj_count, *count; 4759 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4760 PetscSubcomm psubcomm; 4761 MPI_Comm subcomm; 4762 PetscErrorCode ierr; 4763 4764 PetscFunctionBegin; 4765 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4766 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4767 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4768 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4769 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4770 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4771 4772 if (have_void) *have_void = PETSC_FALSE; 4773 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4774 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4775 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4776 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4777 im_active = !!(n); 4778 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4779 void_procs = size - active_procs; 4780 /* get ranks of of non-active processes in mat communicator */ 4781 if (void_procs) { 4782 PetscInt ncand; 4783 4784 if (have_void) *have_void = PETSC_TRUE; 4785 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4786 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4787 for (i=0,ncand=0;i<size;i++) { 4788 if (!procs_candidates[i]) { 4789 procs_candidates[ncand++] = i; 4790 } 4791 } 4792 /* force n_subdomains to be not greater that the number of non-active processes */ 4793 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4794 } 4795 4796 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4797 if (active_procs < *n_subdomains) { 4798 PetscInt issize,isidx; 4799 if (im_active) { 4800 issize = 1; 4801 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4802 isidx = procs_candidates[rank]; 4803 } else { 4804 isidx = rank; 4805 } 4806 } else { 4807 issize = 0; 4808 isidx = -1; 4809 } 4810 *n_subdomains = active_procs; 4811 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4812 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4813 PetscFunctionReturn(0); 4814 } 4815 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4816 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4817 threshold = PetscMax(threshold,2); 4818 4819 /* Get info on mapping */ 4820 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4821 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4822 4823 /* build local CSR graph of subdomains' connectivity */ 4824 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4825 xadj[0] = 0; 4826 xadj[1] = PetscMax(n_neighs-1,0); 4827 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4828 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4829 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4830 for (i=1;i<n_neighs;i++) 4831 for (j=0;j<n_shared[i];j++) 4832 count[shared[i][j]] += 1; 4833 4834 xadj_count = 0; 4835 for (i=1;i<n_neighs;i++) { 4836 for (j=0;j<n_shared[i];j++) { 4837 if (count[shared[i][j]] < threshold) { 4838 adjncy[xadj_count] = neighs[i]; 4839 adjncy_wgt[xadj_count] = n_shared[i]; 4840 xadj_count++; 4841 break; 4842 } 4843 } 4844 } 4845 xadj[1] = xadj_count; 4846 ierr = PetscFree(count);CHKERRQ(ierr); 4847 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4848 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4849 4850 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4851 4852 /* Restrict work on active processes only */ 4853 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4854 if (void_procs) { 4855 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4856 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4857 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4858 subcomm = PetscSubcommChild(psubcomm); 4859 } else { 4860 psubcomm = NULL; 4861 subcomm = PetscObjectComm((PetscObject)mat); 4862 } 4863 4864 v_wgt = NULL; 4865 if (!color) { 4866 ierr = PetscFree(xadj);CHKERRQ(ierr); 4867 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4868 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4869 } else { 4870 Mat subdomain_adj; 4871 IS new_ranks,new_ranks_contig; 4872 MatPartitioning partitioner; 4873 PetscInt rstart=0,rend=0; 4874 PetscInt *is_indices,*oldranks; 4875 PetscMPIInt size; 4876 PetscBool aggregate; 4877 4878 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4879 if (void_procs) { 4880 PetscInt prank = rank; 4881 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4882 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4883 for (i=0;i<xadj[1];i++) { 4884 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4885 } 4886 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4887 } else { 4888 oldranks = NULL; 4889 } 4890 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4891 if (aggregate) { /* TODO: all this part could be made more efficient */ 4892 PetscInt lrows,row,ncols,*cols; 4893 PetscMPIInt nrank; 4894 PetscScalar *vals; 4895 4896 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 4897 lrows = 0; 4898 if (nrank<redprocs) { 4899 lrows = size/redprocs; 4900 if (nrank<size%redprocs) lrows++; 4901 } 4902 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4903 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4904 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4905 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4906 row = nrank; 4907 ncols = xadj[1]-xadj[0]; 4908 cols = adjncy; 4909 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4910 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4911 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4912 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4913 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4914 ierr = PetscFree(xadj);CHKERRQ(ierr); 4915 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4916 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4917 ierr = PetscFree(vals);CHKERRQ(ierr); 4918 if (use_vwgt) { 4919 Vec v; 4920 const PetscScalar *array; 4921 PetscInt nl; 4922 4923 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 4924 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 4925 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 4926 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 4927 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 4928 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 4929 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 4930 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 4931 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 4932 ierr = VecDestroy(&v);CHKERRQ(ierr); 4933 } 4934 } else { 4935 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4936 if (use_vwgt) { 4937 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4938 v_wgt[0] = local_size; 4939 } 4940 } 4941 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4942 4943 /* Partition */ 4944 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 4945 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4946 if (v_wgt) { 4947 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4948 } 4949 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 4950 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 4951 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4952 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4953 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4954 4955 /* renumber new_ranks to avoid "holes" in new set of processors */ 4956 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4957 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4958 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4959 if (!aggregate) { 4960 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4961 #if defined(PETSC_USE_DEBUG) 4962 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 4963 #endif 4964 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 4965 } else if (oldranks) { 4966 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4967 } else { 4968 ranks_send_to_idx[0] = is_indices[0]; 4969 } 4970 } else { 4971 PetscInt idxs[1]; 4972 PetscMPIInt tag; 4973 MPI_Request *reqs; 4974 4975 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4976 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4977 for (i=rstart;i<rend;i++) { 4978 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 4979 } 4980 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 4981 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4982 ierr = PetscFree(reqs);CHKERRQ(ierr); 4983 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4984 #if defined(PETSC_USE_DEBUG) 4985 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 4986 #endif 4987 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 4988 } else if (oldranks) { 4989 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4990 } else { 4991 ranks_send_to_idx[0] = idxs[0]; 4992 } 4993 } 4994 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4995 /* clean up */ 4996 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4997 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4998 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4999 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5000 } 5001 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5002 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5003 5004 /* assemble parallel IS for sends */ 5005 i = 1; 5006 if (!color) i=0; 5007 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5008 PetscFunctionReturn(0); 5009 } 5010 5011 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5012 5013 #undef __FUNCT__ 5014 #define __FUNCT__ "MatISSubassemble" 5015 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 5016 { 5017 Mat local_mat; 5018 IS is_sends_internal; 5019 PetscInt rows,cols,new_local_rows; 5020 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5021 PetscBool ismatis,isdense,newisdense,destroy_mat; 5022 ISLocalToGlobalMapping l2gmap; 5023 PetscInt* l2gmap_indices; 5024 const PetscInt* is_indices; 5025 MatType new_local_type; 5026 /* buffers */ 5027 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5028 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5029 PetscInt *recv_buffer_idxs_local; 5030 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5031 /* MPI */ 5032 MPI_Comm comm,comm_n; 5033 PetscSubcomm subcomm; 5034 PetscMPIInt n_sends,n_recvs,commsize; 5035 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5036 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5037 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5038 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5039 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5040 PetscErrorCode ierr; 5041 5042 PetscFunctionBegin; 5043 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5044 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5045 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5046 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5047 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5048 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5049 PetscValidLogicalCollectiveBool(mat,reuse,6); 5050 PetscValidLogicalCollectiveInt(mat,nis,8); 5051 5052 /* further checks */ 5053 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5054 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5055 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5056 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5057 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5058 if (reuse && *mat_n) { 5059 PetscInt mrows,mcols,mnrows,mncols; 5060 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5061 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5062 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5063 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5064 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5065 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5066 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5067 } 5068 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5069 PetscValidLogicalCollectiveInt(mat,bs,0); 5070 5071 /* prepare IS for sending if not provided */ 5072 if (!is_sends) { 5073 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5074 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5075 } else { 5076 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5077 is_sends_internal = is_sends; 5078 } 5079 5080 /* get comm */ 5081 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5082 5083 /* compute number of sends */ 5084 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5085 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5086 5087 /* compute number of receives */ 5088 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5089 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5090 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5091 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5092 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5093 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5094 ierr = PetscFree(iflags);CHKERRQ(ierr); 5095 5096 /* restrict comm if requested */ 5097 subcomm = 0; 5098 destroy_mat = PETSC_FALSE; 5099 if (restrict_comm) { 5100 PetscMPIInt color,subcommsize; 5101 5102 color = 0; 5103 if (restrict_full) { 5104 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5105 } else { 5106 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5107 } 5108 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5109 subcommsize = commsize - subcommsize; 5110 /* check if reuse has been requested */ 5111 if (reuse) { 5112 if (*mat_n) { 5113 PetscMPIInt subcommsize2; 5114 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5115 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5116 comm_n = PetscObjectComm((PetscObject)*mat_n); 5117 } else { 5118 comm_n = PETSC_COMM_SELF; 5119 } 5120 } else { /* MAT_INITIAL_MATRIX */ 5121 PetscMPIInt rank; 5122 5123 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5124 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5125 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5126 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5127 comm_n = PetscSubcommChild(subcomm); 5128 } 5129 /* flag to destroy *mat_n if not significative */ 5130 if (color) destroy_mat = PETSC_TRUE; 5131 } else { 5132 comm_n = comm; 5133 } 5134 5135 /* prepare send/receive buffers */ 5136 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5137 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5138 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5139 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5140 if (nis) { 5141 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5142 } 5143 5144 /* Get data from local matrices */ 5145 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5146 /* TODO: See below some guidelines on how to prepare the local buffers */ 5147 /* 5148 send_buffer_vals should contain the raw values of the local matrix 5149 send_buffer_idxs should contain: 5150 - MatType_PRIVATE type 5151 - PetscInt size_of_l2gmap 5152 - PetscInt global_row_indices[size_of_l2gmap] 5153 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5154 */ 5155 else { 5156 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5157 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5158 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5159 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5160 send_buffer_idxs[1] = i; 5161 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5162 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5163 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5164 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5165 for (i=0;i<n_sends;i++) { 5166 ilengths_vals[is_indices[i]] = len*len; 5167 ilengths_idxs[is_indices[i]] = len+2; 5168 } 5169 } 5170 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5171 /* additional is (if any) */ 5172 if (nis) { 5173 PetscMPIInt psum; 5174 PetscInt j; 5175 for (j=0,psum=0;j<nis;j++) { 5176 PetscInt plen; 5177 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5178 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5179 psum += len+1; /* indices + lenght */ 5180 } 5181 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5182 for (j=0,psum=0;j<nis;j++) { 5183 PetscInt plen; 5184 const PetscInt *is_array_idxs; 5185 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5186 send_buffer_idxs_is[psum] = plen; 5187 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5188 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5189 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5190 psum += plen+1; /* indices + lenght */ 5191 } 5192 for (i=0;i<n_sends;i++) { 5193 ilengths_idxs_is[is_indices[i]] = psum; 5194 } 5195 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5196 } 5197 5198 buf_size_idxs = 0; 5199 buf_size_vals = 0; 5200 buf_size_idxs_is = 0; 5201 for (i=0;i<n_recvs;i++) { 5202 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5203 buf_size_vals += (PetscInt)olengths_vals[i]; 5204 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5205 } 5206 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5207 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5208 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5209 5210 /* get new tags for clean communications */ 5211 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5212 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5213 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5214 5215 /* allocate for requests */ 5216 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5217 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5218 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5219 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5220 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5221 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5222 5223 /* communications */ 5224 ptr_idxs = recv_buffer_idxs; 5225 ptr_vals = recv_buffer_vals; 5226 ptr_idxs_is = recv_buffer_idxs_is; 5227 for (i=0;i<n_recvs;i++) { 5228 source_dest = onodes[i]; 5229 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5230 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5231 ptr_idxs += olengths_idxs[i]; 5232 ptr_vals += olengths_vals[i]; 5233 if (nis) { 5234 source_dest = onodes_is[i]; 5235 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5236 ptr_idxs_is += olengths_idxs_is[i]; 5237 } 5238 } 5239 for (i=0;i<n_sends;i++) { 5240 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5241 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5242 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5243 if (nis) { 5244 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5245 } 5246 } 5247 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5248 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5249 5250 /* assemble new l2g map */ 5251 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5252 ptr_idxs = recv_buffer_idxs; 5253 new_local_rows = 0; 5254 for (i=0;i<n_recvs;i++) { 5255 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5256 ptr_idxs += olengths_idxs[i]; 5257 } 5258 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5259 ptr_idxs = recv_buffer_idxs; 5260 new_local_rows = 0; 5261 for (i=0;i<n_recvs;i++) { 5262 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5263 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5264 ptr_idxs += olengths_idxs[i]; 5265 } 5266 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5267 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5268 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5269 5270 /* infer new local matrix type from received local matrices type */ 5271 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5272 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5273 if (n_recvs) { 5274 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5275 ptr_idxs = recv_buffer_idxs; 5276 for (i=0;i<n_recvs;i++) { 5277 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5278 new_local_type_private = MATAIJ_PRIVATE; 5279 break; 5280 } 5281 ptr_idxs += olengths_idxs[i]; 5282 } 5283 switch (new_local_type_private) { 5284 case MATDENSE_PRIVATE: 5285 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5286 new_local_type = MATSEQAIJ; 5287 bs = 1; 5288 } else { /* if I receive only 1 dense matrix */ 5289 new_local_type = MATSEQDENSE; 5290 bs = 1; 5291 } 5292 break; 5293 case MATAIJ_PRIVATE: 5294 new_local_type = MATSEQAIJ; 5295 bs = 1; 5296 break; 5297 case MATBAIJ_PRIVATE: 5298 new_local_type = MATSEQBAIJ; 5299 break; 5300 case MATSBAIJ_PRIVATE: 5301 new_local_type = MATSEQSBAIJ; 5302 break; 5303 default: 5304 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5305 break; 5306 } 5307 } else { /* by default, new_local_type is seqdense */ 5308 new_local_type = MATSEQDENSE; 5309 bs = 1; 5310 } 5311 5312 /* create MATIS object if needed */ 5313 if (!reuse) { 5314 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5315 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5316 } else { 5317 /* it also destroys the local matrices */ 5318 if (*mat_n) { 5319 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5320 } else { /* this is a fake object */ 5321 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5322 } 5323 } 5324 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5325 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5326 5327 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5328 5329 /* Global to local map of received indices */ 5330 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5331 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5332 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5333 5334 /* restore attributes -> type of incoming data and its size */ 5335 buf_size_idxs = 0; 5336 for (i=0;i<n_recvs;i++) { 5337 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5338 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5339 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5340 } 5341 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5342 5343 /* set preallocation */ 5344 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5345 if (!newisdense) { 5346 PetscInt *new_local_nnz=0; 5347 5348 ptr_vals = recv_buffer_vals; 5349 ptr_idxs = recv_buffer_idxs_local; 5350 if (n_recvs) { 5351 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5352 } 5353 for (i=0;i<n_recvs;i++) { 5354 PetscInt j; 5355 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5356 for (j=0;j<*(ptr_idxs+1);j++) { 5357 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5358 } 5359 } else { 5360 /* TODO */ 5361 } 5362 ptr_idxs += olengths_idxs[i]; 5363 } 5364 if (new_local_nnz) { 5365 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5366 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5367 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5368 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5369 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5370 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5371 } else { 5372 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5373 } 5374 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5375 } else { 5376 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5377 } 5378 5379 /* set values */ 5380 ptr_vals = recv_buffer_vals; 5381 ptr_idxs = recv_buffer_idxs_local; 5382 for (i=0;i<n_recvs;i++) { 5383 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5384 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5385 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5386 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5387 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5388 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5389 } else { 5390 /* TODO */ 5391 } 5392 ptr_idxs += olengths_idxs[i]; 5393 ptr_vals += olengths_vals[i]; 5394 } 5395 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5396 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5397 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5398 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5399 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5400 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5401 5402 #if 0 5403 if (!restrict_comm) { /* check */ 5404 Vec lvec,rvec; 5405 PetscReal infty_error; 5406 5407 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5408 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5409 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5410 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5411 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5412 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5413 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5414 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5415 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5416 } 5417 #endif 5418 5419 /* assemble new additional is (if any) */ 5420 if (nis) { 5421 PetscInt **temp_idxs,*count_is,j,psum; 5422 5423 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5424 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5425 ptr_idxs = recv_buffer_idxs_is; 5426 psum = 0; 5427 for (i=0;i<n_recvs;i++) { 5428 for (j=0;j<nis;j++) { 5429 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5430 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5431 psum += plen; 5432 ptr_idxs += plen+1; /* shift pointer to received data */ 5433 } 5434 } 5435 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5436 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5437 for (i=1;i<nis;i++) { 5438 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5439 } 5440 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5441 ptr_idxs = recv_buffer_idxs_is; 5442 for (i=0;i<n_recvs;i++) { 5443 for (j=0;j<nis;j++) { 5444 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5445 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5446 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5447 ptr_idxs += plen+1; /* shift pointer to received data */ 5448 } 5449 } 5450 for (i=0;i<nis;i++) { 5451 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5452 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5453 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5454 } 5455 ierr = PetscFree(count_is);CHKERRQ(ierr); 5456 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5457 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5458 } 5459 /* free workspace */ 5460 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5461 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5462 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5463 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5464 if (isdense) { 5465 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5466 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5467 } else { 5468 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5469 } 5470 if (nis) { 5471 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5472 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5473 } 5474 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5475 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5476 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5477 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5478 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5479 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5480 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5481 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5482 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5483 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5484 ierr = PetscFree(onodes);CHKERRQ(ierr); 5485 if (nis) { 5486 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5487 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5488 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5489 } 5490 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5491 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5492 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5493 for (i=0;i<nis;i++) { 5494 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5495 } 5496 *mat_n = NULL; 5497 } 5498 PetscFunctionReturn(0); 5499 } 5500 5501 /* temporary hack into ksp private data structure */ 5502 #include <petsc/private/kspimpl.h> 5503 5504 #undef __FUNCT__ 5505 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5506 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5507 { 5508 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5509 PC_IS *pcis = (PC_IS*)pc->data; 5510 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5511 MatNullSpace CoarseNullSpace=NULL; 5512 ISLocalToGlobalMapping coarse_islg; 5513 IS coarse_is,*isarray; 5514 PetscInt i,im_active=-1,active_procs=-1; 5515 PetscInt nis,nisdofs,nisneu,nisvert; 5516 PC pc_temp; 5517 PCType coarse_pc_type; 5518 KSPType coarse_ksp_type; 5519 PetscBool multilevel_requested,multilevel_allowed; 5520 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5521 Mat t_coarse_mat_is; 5522 PetscInt ncoarse; 5523 PetscBool compute_vecs = PETSC_FALSE; 5524 PetscScalar *array; 5525 MatReuse coarse_mat_reuse; 5526 PetscBool restr, full_restr, have_void; 5527 PetscErrorCode ierr; 5528 5529 PetscFunctionBegin; 5530 /* Assign global numbering to coarse dofs */ 5531 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5532 PetscInt ocoarse_size; 5533 compute_vecs = PETSC_TRUE; 5534 ocoarse_size = pcbddc->coarse_size; 5535 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5536 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5537 /* see if we can avoid some work */ 5538 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5539 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5540 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5541 PC pc; 5542 PetscBool isbddc; 5543 5544 /* temporary workaround since PCBDDC does not have a reset method so far */ 5545 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5546 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5547 if (isbddc) { 5548 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5549 } else { 5550 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5551 } 5552 coarse_reuse = PETSC_FALSE; 5553 } else { /* we can safely reuse already computed coarse matrix */ 5554 coarse_reuse = PETSC_TRUE; 5555 } 5556 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5557 coarse_reuse = PETSC_FALSE; 5558 } 5559 /* reset any subassembling information */ 5560 if (!coarse_reuse || pcbddc->recompute_topography) { 5561 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5562 } 5563 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5564 coarse_reuse = PETSC_TRUE; 5565 } 5566 /* assemble coarse matrix */ 5567 if (coarse_reuse && pcbddc->coarse_ksp) { 5568 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5569 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5570 coarse_mat_reuse = MAT_REUSE_MATRIX; 5571 } else { 5572 coarse_mat = NULL; 5573 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5574 } 5575 5576 /* creates temporary l2gmap and IS for coarse indexes */ 5577 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5578 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5579 5580 /* creates temporary MATIS object for coarse matrix */ 5581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5582 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5583 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5584 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5585 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5586 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5587 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5588 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5589 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5590 5591 /* count "active" (i.e. with positive local size) and "void" processes */ 5592 im_active = !!(pcis->n); 5593 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5594 5595 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5596 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5597 /* full_restr : just use the receivers from the subassembling pattern */ 5598 coarse_mat_is = NULL; 5599 multilevel_allowed = PETSC_FALSE; 5600 multilevel_requested = PETSC_FALSE; 5601 full_restr = PETSC_TRUE; 5602 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5603 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5604 if (multilevel_requested) { 5605 ncoarse = active_procs/pcbddc->coarsening_ratio; 5606 restr = PETSC_FALSE; 5607 full_restr = PETSC_FALSE; 5608 } else { 5609 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5610 restr = PETSC_TRUE; 5611 full_restr = PETSC_TRUE; 5612 } 5613 ncoarse = PetscMax(1,ncoarse); 5614 if (!pcbddc->coarse_subassembling) { 5615 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5616 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5617 PetscInt psum; 5618 PetscMPIInt size; 5619 if (pcbddc->coarse_ksp) psum = 1; 5620 else psum = 0; 5621 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5622 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5623 if (ncoarse < size) have_void = PETSC_TRUE; 5624 } 5625 /* determine if we can go multilevel */ 5626 if (multilevel_requested) { 5627 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5628 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5629 } 5630 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5631 5632 /* dump subassembling pattern */ 5633 if (pcbddc->dbg_flag && multilevel_allowed) { 5634 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5635 } 5636 5637 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5638 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5639 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5640 const PetscInt *idxs; 5641 ISLocalToGlobalMapping tmap; 5642 5643 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5644 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5645 /* allocate space for temporary storage */ 5646 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5647 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5648 /* allocate for IS array */ 5649 nisdofs = pcbddc->n_ISForDofsLocal; 5650 nisneu = !!pcbddc->NeumannBoundariesLocal; 5651 nisvert = 0; /* nisvert is not used */ 5652 nis = nisdofs + nisneu + nisvert; 5653 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5654 /* dofs splitting */ 5655 for (i=0;i<nisdofs;i++) { 5656 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5657 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5658 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5659 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5660 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5661 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5662 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5663 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5664 } 5665 /* neumann boundaries */ 5666 if (pcbddc->NeumannBoundariesLocal) { 5667 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5668 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5669 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5670 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5671 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5672 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5673 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5674 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5675 } 5676 /* free memory */ 5677 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5678 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5679 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5680 } else { 5681 nis = 0; 5682 nisdofs = 0; 5683 nisneu = 0; 5684 nisvert = 0; 5685 isarray = NULL; 5686 } 5687 /* destroy no longer needed map */ 5688 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5689 5690 /* subassemble */ 5691 if (multilevel_allowed) { 5692 PetscBool reuse,reuser; 5693 if (coarse_mat) reuse = PETSC_TRUE; 5694 else reuse = PETSC_FALSE; 5695 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5696 if (reuser) { 5697 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5698 } else { 5699 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5700 } 5701 } else { 5702 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5703 } 5704 if (coarse_mat_is || coarse_mat) { 5705 PetscMPIInt size; 5706 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5707 if (!multilevel_allowed) { 5708 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5709 } else { 5710 Mat A; 5711 5712 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5713 if (coarse_mat_is) { 5714 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5715 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5716 coarse_mat = coarse_mat_is; 5717 } 5718 /* be sure we don't have MatSeqDENSE as local mat */ 5719 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5720 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5721 } 5722 } 5723 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5724 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5725 5726 /* create local to global scatters for coarse problem */ 5727 if (compute_vecs) { 5728 PetscInt lrows; 5729 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5730 if (coarse_mat) { 5731 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5732 } else { 5733 lrows = 0; 5734 } 5735 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5736 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5737 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5738 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5739 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5740 } 5741 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5742 5743 /* set defaults for coarse KSP and PC */ 5744 if (multilevel_allowed) { 5745 coarse_ksp_type = KSPRICHARDSON; 5746 coarse_pc_type = PCBDDC; 5747 } else { 5748 coarse_ksp_type = KSPPREONLY; 5749 coarse_pc_type = PCREDUNDANT; 5750 } 5751 5752 /* print some info if requested */ 5753 if (pcbddc->dbg_flag) { 5754 if (!multilevel_allowed) { 5755 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5756 if (multilevel_requested) { 5757 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5758 } else if (pcbddc->max_levels) { 5759 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5760 } 5761 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5762 } 5763 } 5764 5765 /* create the coarse KSP object only once with defaults */ 5766 if (coarse_mat) { 5767 PetscViewer dbg_viewer = NULL; 5768 if (pcbddc->dbg_flag) { 5769 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5770 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5771 } 5772 if (!pcbddc->coarse_ksp) { 5773 char prefix[256],str_level[16]; 5774 size_t len; 5775 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5776 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5777 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5778 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5779 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5780 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5781 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5782 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5783 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5784 /* prefix */ 5785 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5786 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5787 if (!pcbddc->current_level) { 5788 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5789 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5790 } else { 5791 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5792 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5793 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5794 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5795 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5796 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5797 } 5798 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5799 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5800 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5801 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5802 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5803 /* allow user customization */ 5804 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5805 } 5806 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5807 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5808 if (nisdofs) { 5809 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5810 for (i=0;i<nisdofs;i++) { 5811 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5812 } 5813 } 5814 if (nisneu) { 5815 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5816 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5817 } 5818 if (nisvert) { 5819 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5820 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5821 } 5822 5823 /* get some info after set from options */ 5824 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5825 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5826 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5827 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5828 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5829 isbddc = PETSC_FALSE; 5830 } 5831 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5832 if (isredundant) { 5833 KSP inner_ksp; 5834 PC inner_pc; 5835 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5836 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5837 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5838 } 5839 5840 /* parameters which miss an API */ 5841 if (isbddc) { 5842 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5843 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5844 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5845 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5846 if (pcbddc_coarse->benign_saddle_point) { 5847 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5848 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5849 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5850 } 5851 } 5852 5853 /* propagate symmetry info of coarse matrix */ 5854 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5855 if (pc->pmat->symmetric_set) { 5856 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5857 } 5858 if (pc->pmat->hermitian_set) { 5859 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5860 } 5861 if (pc->pmat->spd_set) { 5862 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5863 } 5864 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 5865 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 5866 } 5867 /* set operators */ 5868 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5869 if (pcbddc->dbg_flag) { 5870 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5871 } 5872 } 5873 ierr = PetscFree(isarray);CHKERRQ(ierr); 5874 #if 0 5875 { 5876 PetscViewer viewer; 5877 char filename[256]; 5878 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5879 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5880 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5881 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5882 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5883 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5884 } 5885 #endif 5886 5887 /* Compute coarse null space (special handling by BDDC only) */ 5888 #if 0 5889 if (pcbddc->NullSpace) { 5890 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5891 } 5892 #endif 5893 /* hack */ 5894 if (pcbddc->coarse_ksp) { 5895 Vec crhs,csol; 5896 5897 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5898 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5899 if (!csol) { 5900 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5901 } 5902 if (!crhs) { 5903 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5904 } 5905 } 5906 5907 /* compute null space for coarse solver if the benign trick has been requested */ 5908 if (pcbddc->benign_null) { 5909 5910 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5911 for (i=0;i<pcbddc->benign_n;i++) { 5912 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5913 } 5914 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5915 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5916 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5917 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5918 if (coarse_mat) { 5919 Vec nullv; 5920 PetscScalar *array,*array2; 5921 PetscInt nl; 5922 5923 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5924 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5925 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5926 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5927 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5928 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5929 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5930 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5931 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5932 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5933 } 5934 } 5935 5936 if (pcbddc->coarse_ksp) { 5937 PetscBool ispreonly; 5938 5939 if (CoarseNullSpace) { 5940 PetscBool isnull; 5941 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5942 if (0) { 5943 if (isbddc && !pcbddc->benign_saddle_point) { 5944 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5945 } else { 5946 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5947 } 5948 } else { 5949 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5950 } 5951 } 5952 /* setup coarse ksp */ 5953 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5954 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5955 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5956 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5957 KSP check_ksp; 5958 KSPType check_ksp_type; 5959 PC check_pc; 5960 Vec check_vec,coarse_vec; 5961 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5962 PetscInt its; 5963 PetscBool compute_eigs; 5964 PetscReal *eigs_r,*eigs_c; 5965 PetscInt neigs; 5966 const char *prefix; 5967 5968 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5969 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5970 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5971 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5972 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5973 /* prevent from setup unneeded object */ 5974 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 5975 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 5976 if (ispreonly) { 5977 check_ksp_type = KSPPREONLY; 5978 compute_eigs = PETSC_FALSE; 5979 } else { 5980 check_ksp_type = KSPGMRES; 5981 compute_eigs = PETSC_TRUE; 5982 } 5983 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5984 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5985 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5986 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5987 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5988 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5989 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5990 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5991 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5992 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5993 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5994 /* create random vec */ 5995 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 5996 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5997 if (CoarseNullSpace) { 5998 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5999 } 6000 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6001 /* solve coarse problem */ 6002 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6003 if (CoarseNullSpace) { 6004 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 6005 } 6006 /* set eigenvalue estimation if preonly has not been requested */ 6007 if (compute_eigs) { 6008 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6009 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6010 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6011 lambda_max = eigs_r[neigs-1]; 6012 lambda_min = eigs_r[0]; 6013 if (pcbddc->use_coarse_estimates) { 6014 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6015 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6016 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6017 } 6018 } 6019 } 6020 6021 /* check coarse problem residual error */ 6022 if (pcbddc->dbg_flag) { 6023 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6024 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6025 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6026 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6027 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6028 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6029 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6030 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6031 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6032 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6033 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6034 if (CoarseNullSpace) { 6035 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6036 } 6037 if (compute_eigs) { 6038 PetscReal lambda_max_s,lambda_min_s; 6039 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6040 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6041 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6042 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6043 for (i=0;i<neigs;i++) { 6044 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6045 } 6046 } 6047 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6048 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6049 } 6050 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6051 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6052 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6053 if (compute_eigs) { 6054 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6055 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6056 } 6057 } 6058 } 6059 /* print additional info */ 6060 if (pcbddc->dbg_flag) { 6061 /* waits until all processes reaches this point */ 6062 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6064 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6065 } 6066 6067 /* free memory */ 6068 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6069 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6070 PetscFunctionReturn(0); 6071 } 6072 6073 #undef __FUNCT__ 6074 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6075 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6076 { 6077 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6078 PC_IS* pcis = (PC_IS*)pc->data; 6079 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6080 IS subset,subset_mult,subset_n; 6081 PetscInt local_size,coarse_size=0; 6082 PetscInt *local_primal_indices=NULL; 6083 const PetscInt *t_local_primal_indices; 6084 PetscErrorCode ierr; 6085 6086 PetscFunctionBegin; 6087 /* Compute global number of coarse dofs */ 6088 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6089 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6090 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6091 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6092 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6093 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6094 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6095 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6096 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6097 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6098 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6099 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6100 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6101 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6102 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6103 6104 /* check numbering */ 6105 if (pcbddc->dbg_flag) { 6106 PetscScalar coarsesum,*array,*array2; 6107 PetscInt i; 6108 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6109 6110 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6111 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6113 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6114 /* counter */ 6115 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6116 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6117 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6118 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6119 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6120 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6121 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6122 for (i=0;i<pcbddc->local_primal_size;i++) { 6123 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6124 } 6125 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6126 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6127 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6128 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6129 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6130 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6131 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6132 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6133 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6134 for (i=0;i<pcis->n;i++) { 6135 if (array[i] != 0.0 && array[i] != array2[i]) { 6136 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6137 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6138 set_error = PETSC_TRUE; 6139 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6141 } 6142 } 6143 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6144 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6145 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6146 for (i=0;i<pcis->n;i++) { 6147 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6148 } 6149 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6150 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6151 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6152 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6153 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6155 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6156 PetscInt *gidxs; 6157 6158 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6159 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6161 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6163 for (i=0;i<pcbddc->local_primal_size;i++) { 6164 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6165 } 6166 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6167 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6168 } 6169 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6170 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6171 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6172 } 6173 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6174 /* get back data */ 6175 *coarse_size_n = coarse_size; 6176 *local_primal_indices_n = local_primal_indices; 6177 PetscFunctionReturn(0); 6178 } 6179 6180 #undef __FUNCT__ 6181 #define __FUNCT__ "PCBDDCGlobalToLocal" 6182 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6183 { 6184 IS localis_t; 6185 PetscInt i,lsize,*idxs,n; 6186 PetscScalar *vals; 6187 PetscErrorCode ierr; 6188 6189 PetscFunctionBegin; 6190 /* get indices in local ordering exploiting local to global map */ 6191 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6192 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6193 for (i=0;i<lsize;i++) vals[i] = 1.0; 6194 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6195 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6196 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6197 if (idxs) { /* multilevel guard */ 6198 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6199 } 6200 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6201 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6202 ierr = PetscFree(vals);CHKERRQ(ierr); 6203 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6204 /* now compute set in local ordering */ 6205 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6206 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6207 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6208 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6209 for (i=0,lsize=0;i<n;i++) { 6210 if (PetscRealPart(vals[i]) > 0.5) { 6211 lsize++; 6212 } 6213 } 6214 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6215 for (i=0,lsize=0;i<n;i++) { 6216 if (PetscRealPart(vals[i]) > 0.5) { 6217 idxs[lsize++] = i; 6218 } 6219 } 6220 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6221 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6222 *localis = localis_t; 6223 PetscFunctionReturn(0); 6224 } 6225 6226 #undef __FUNCT__ 6227 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6228 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6229 { 6230 PC_IS *pcis=(PC_IS*)pc->data; 6231 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6232 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6233 Mat S_j; 6234 PetscInt *used_xadj,*used_adjncy; 6235 PetscBool free_used_adj; 6236 PetscErrorCode ierr; 6237 6238 PetscFunctionBegin; 6239 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6240 free_used_adj = PETSC_FALSE; 6241 if (pcbddc->sub_schurs_layers == -1) { 6242 used_xadj = NULL; 6243 used_adjncy = NULL; 6244 } else { 6245 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6246 used_xadj = pcbddc->mat_graph->xadj; 6247 used_adjncy = pcbddc->mat_graph->adjncy; 6248 } else if (pcbddc->computed_rowadj) { 6249 used_xadj = pcbddc->mat_graph->xadj; 6250 used_adjncy = pcbddc->mat_graph->adjncy; 6251 } else { 6252 PetscBool flg_row=PETSC_FALSE; 6253 const PetscInt *xadj,*adjncy; 6254 PetscInt nvtxs; 6255 6256 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6257 if (flg_row) { 6258 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6259 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6260 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6261 free_used_adj = PETSC_TRUE; 6262 } else { 6263 pcbddc->sub_schurs_layers = -1; 6264 used_xadj = NULL; 6265 used_adjncy = NULL; 6266 } 6267 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6268 } 6269 } 6270 6271 /* setup sub_schurs data */ 6272 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6273 if (!sub_schurs->schur_explicit) { 6274 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6275 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6276 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6277 } else { 6278 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6279 PetscBool isseqaij,need_change = PETSC_FALSE;; 6280 PetscInt benign_n; 6281 Mat change = NULL; 6282 Vec scaling = NULL; 6283 IS change_primal = NULL; 6284 6285 if (!pcbddc->use_vertices && reuse_solvers) { 6286 PetscInt n_vertices; 6287 6288 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6289 reuse_solvers = (PetscBool)!n_vertices; 6290 } 6291 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6292 if (!isseqaij) { 6293 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6294 if (matis->A == pcbddc->local_mat) { 6295 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6296 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6297 } else { 6298 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6299 } 6300 } 6301 if (!pcbddc->benign_change_explicit) { 6302 benign_n = pcbddc->benign_n; 6303 } else { 6304 benign_n = 0; 6305 } 6306 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6307 We need a global reduction to avoid possible deadlocks. 6308 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6309 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6310 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6311 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6312 need_change = (PetscBool)(!need_change); 6313 } 6314 /* If the user defines additional constraints, we import them here. 6315 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6316 if (need_change) { 6317 PC_IS *pcisf; 6318 PC_BDDC *pcbddcf; 6319 PC pcf; 6320 6321 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6322 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6323 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6324 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6325 /* hacks */ 6326 pcisf = (PC_IS*)pcf->data; 6327 pcisf->is_B_local = pcis->is_B_local; 6328 pcisf->vec1_N = pcis->vec1_N; 6329 pcisf->BtoNmap = pcis->BtoNmap; 6330 pcisf->n = pcis->n; 6331 pcisf->n_B = pcis->n_B; 6332 pcbddcf = (PC_BDDC*)pcf->data; 6333 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6334 pcbddcf->mat_graph = pcbddc->mat_graph; 6335 pcbddcf->use_faces = PETSC_TRUE; 6336 pcbddcf->use_change_of_basis = PETSC_TRUE; 6337 pcbddcf->use_change_on_faces = PETSC_TRUE; 6338 pcbddcf->use_qr_single = PETSC_TRUE; 6339 pcbddcf->fake_change = PETSC_TRUE; 6340 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6341 /* store information on primal vertices and change of basis (in local numbering) */ 6342 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6343 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6344 change = pcbddcf->ConstraintMatrix; 6345 pcbddcf->ConstraintMatrix = NULL; 6346 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6347 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6348 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6349 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6350 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6351 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6352 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6353 pcf->ops->destroy = NULL; 6354 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6355 } 6356 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6357 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6358 ierr = MatDestroy(&change);CHKERRQ(ierr); 6359 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6360 } 6361 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6362 6363 /* free adjacency */ 6364 if (free_used_adj) { 6365 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6366 } 6367 PetscFunctionReturn(0); 6368 } 6369 6370 #undef __FUNCT__ 6371 #define __FUNCT__ "PCBDDCInitSubSchurs" 6372 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6373 { 6374 PC_IS *pcis=(PC_IS*)pc->data; 6375 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6376 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6377 PCBDDCGraph graph; 6378 PetscErrorCode ierr; 6379 6380 PetscFunctionBegin; 6381 /* attach interface graph for determining subsets */ 6382 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6383 IS verticesIS,verticescomm; 6384 PetscInt vsize,*idxs; 6385 6386 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6387 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6388 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6389 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6390 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6391 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6392 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6393 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6394 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6395 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6396 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6397 } else { 6398 graph = pcbddc->mat_graph; 6399 } 6400 /* print some info */ 6401 if (pcbddc->dbg_flag) { 6402 IS vertices; 6403 PetscInt nv,nedges,nfaces; 6404 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6405 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6406 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6407 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6408 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6409 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6414 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6415 } 6416 6417 /* sub_schurs init */ 6418 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6419 6420 /* free graph struct */ 6421 if (pcbddc->sub_schurs_rebuild) { 6422 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6423 } 6424 PetscFunctionReturn(0); 6425 } 6426 6427 #undef __FUNCT__ 6428 #define __FUNCT__ "PCBDDCCheckOperator" 6429 PetscErrorCode PCBDDCCheckOperator(PC pc) 6430 { 6431 PC_IS *pcis=(PC_IS*)pc->data; 6432 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6433 PetscErrorCode ierr; 6434 6435 PetscFunctionBegin; 6436 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6437 IS zerodiag = NULL; 6438 Mat S_j,B0_B=NULL; 6439 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6440 PetscScalar *p0_check,*array,*array2; 6441 PetscReal norm; 6442 PetscInt i; 6443 6444 /* B0 and B0_B */ 6445 if (zerodiag) { 6446 IS dummy; 6447 6448 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6449 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6450 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6451 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6452 } 6453 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6454 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6455 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6456 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6457 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6458 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6459 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6460 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6461 /* S_j */ 6462 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6463 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6464 6465 /* mimic vector in \widetilde{W}_\Gamma */ 6466 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6467 /* continuous in primal space */ 6468 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6469 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6470 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6471 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6472 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6473 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6474 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6475 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6476 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6477 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6478 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6479 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6480 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6481 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6482 6483 /* assemble rhs for coarse problem */ 6484 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6485 /* local with Schur */ 6486 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6487 if (zerodiag) { 6488 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6489 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6490 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6491 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6492 } 6493 /* sum on primal nodes the local contributions */ 6494 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6495 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6496 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6497 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6498 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6499 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6500 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6501 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6502 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6503 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6504 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6505 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6506 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6507 /* scale primal nodes (BDDC sums contibutions) */ 6508 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6509 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6510 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6511 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6512 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6513 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6514 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6515 /* global: \widetilde{B0}_B w_\Gamma */ 6516 if (zerodiag) { 6517 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6518 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6519 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6520 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6521 } 6522 /* BDDC */ 6523 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6524 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6525 6526 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6527 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6528 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6529 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6530 for (i=0;i<pcbddc->benign_n;i++) { 6531 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6532 } 6533 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6534 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6535 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6536 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6537 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6538 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6539 } 6540 PetscFunctionReturn(0); 6541 } 6542