1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 9 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 10 { 11 PCBDDCBenignMatMult_ctx ctx; 12 PetscErrorCode ierr; 13 PetscBool apply_right,apply_left,reset_x; 14 15 PetscFunctionBegin; 16 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 17 if (transpose) { 18 apply_right = ctx->apply_left; 19 apply_left = ctx->apply_right; 20 } else { 21 apply_right = ctx->apply_right; 22 apply_left = ctx->apply_left; 23 } 24 reset_x = PETSC_FALSE; 25 if (apply_right) { 26 const PetscScalar *ax; 27 PetscInt nl,i; 28 29 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 30 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 31 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 32 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 33 for (i=0;i<ctx->benign_n;i++) { 34 PetscScalar sum,val; 35 const PetscInt *idxs; 36 PetscInt nz,j; 37 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 38 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 39 sum = 0.; 40 if (ctx->apply_p0) { 41 val = ctx->work[idxs[nz-1]]; 42 for (j=0;j<nz-1;j++) { 43 sum += ctx->work[idxs[j]]; 44 ctx->work[idxs[j]] += val; 45 } 46 } else { 47 for (j=0;j<nz-1;j++) { 48 sum += ctx->work[idxs[j]]; 49 } 50 } 51 ctx->work[idxs[nz-1]] -= sum; 52 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 53 } 54 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 55 reset_x = PETSC_TRUE; 56 } 57 if (transpose) { 58 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 59 } else { 60 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 61 } 62 if (reset_x) { 63 ierr = VecResetArray(x);CHKERRQ(ierr); 64 } 65 if (apply_left) { 66 PetscScalar *ay; 67 PetscInt i; 68 69 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 70 for (i=0;i<ctx->benign_n;i++) { 71 PetscScalar sum,val; 72 const PetscInt *idxs; 73 PetscInt nz,j; 74 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 75 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 76 val = -ay[idxs[nz-1]]; 77 if (ctx->apply_p0) { 78 sum = 0.; 79 for (j=0;j<nz-1;j++) { 80 sum += ay[idxs[j]]; 81 ay[idxs[j]] += val; 82 } 83 ay[idxs[nz-1]] += sum; 84 } else { 85 for (j=0;j<nz-1;j++) { 86 ay[idxs[j]] += val; 87 } 88 ay[idxs[nz-1]] = 0.; 89 } 90 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 91 } 92 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 93 } 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 99 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 100 { 101 PetscErrorCode ierr; 102 103 PetscFunctionBegin; 104 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 110 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 111 { 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCBenignShellMat" 121 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 122 { 123 PC_IS *pcis = (PC_IS*)pc->data; 124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 125 PCBDDCBenignMatMult_ctx ctx; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 if (!restore) { 130 Mat A_IB,A_BI; 131 PetscScalar *work; 132 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs->reuse_solver; 133 134 if (pcbddc->benign_original_mat) { 135 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 136 } 137 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 138 PetscFunctionReturn(0); 139 } 140 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 141 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 142 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 143 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 144 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 145 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 146 ierr = PetscNew(&ctx);CHKERRQ(ierr); 147 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 148 ctx->apply_left = PETSC_TRUE; 149 ctx->apply_right = PETSC_FALSE; 150 ctx->apply_p0 = PETSC_FALSE; 151 ctx->benign_n = pcbddc->benign_n; 152 if (reuse) { 153 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 154 ctx->free = PETSC_FALSE; 155 } else { /* TODO: could be optimized for successive solves */ 156 ISLocalToGlobalMapping N_to_D; 157 PetscInt i; 158 159 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 160 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 161 for (i=0;i<pcbddc->benign_n;i++) { 162 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 163 } 164 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 165 ctx->free = PETSC_TRUE; 166 } 167 ctx->A = pcis->A_IB; 168 ctx->work = work; 169 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 170 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 pcis->A_IB = A_IB; 173 174 /* A_BI as A_IB^T */ 175 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 176 pcbddc->benign_original_mat = pcis->A_BI; 177 pcis->A_BI = A_BI; 178 } else { 179 if (!pcbddc->benign_original_mat) { 180 PetscFunctionReturn(0); 181 } 182 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 183 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 184 pcis->A_IB = ctx->A; 185 ctx->A = NULL; 186 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 187 pcis->A_BI = pcbddc->benign_original_mat; 188 pcbddc->benign_original_mat = NULL; 189 if (ctx->free) { 190 PetscInt i; 191 for (i=0;i<ctx->benign_n;i++) { 192 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 193 } 194 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 195 } 196 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 197 ierr = PetscFree(ctx);CHKERRQ(ierr); 198 } 199 PetscFunctionReturn(0); 200 } 201 202 /* used just in bddc debug mode */ 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCBDDCBenignProject" 205 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 206 { 207 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 208 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 209 Mat An; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 214 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 215 if (is1) { 216 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 217 ierr = MatDestroy(&An);CHKERRQ(ierr); 218 } else { 219 *B = An; 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* TODO: add reuse flag */ 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatSeqAIJCompress" 227 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 228 { 229 Mat Bt; 230 PetscScalar *a,*bdata; 231 const PetscInt *ii,*ij; 232 PetscInt m,n,i,nnz,*bii,*bij; 233 PetscBool flg_row; 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 238 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 239 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 240 nnz = n; 241 for (i=0;i<ii[n];i++) { 242 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 243 } 244 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 245 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 246 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 247 nnz = 0; 248 bii[0] = 0; 249 for (i=0;i<n;i++) { 250 PetscInt j; 251 for (j=ii[i];j<ii[i+1];j++) { 252 PetscScalar entry = a[j]; 253 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 254 bij[nnz] = ij[j]; 255 bdata[nnz] = entry; 256 nnz++; 257 } 258 } 259 bii[i+1] = nnz; 260 } 261 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 262 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 263 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 264 { 265 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 266 b->free_a = PETSC_TRUE; 267 b->free_ij = PETSC_TRUE; 268 } 269 *B = Bt; 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatDetectDisconnectedComponents" 275 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 276 { 277 Mat B; 278 IS is_dummy,*cc_n; 279 ISLocalToGlobalMapping l2gmap_dummy; 280 PCBDDCGraph graph; 281 PetscInt i,n; 282 PetscInt *xadj,*adjncy; 283 PetscInt *xadj_filtered,*adjncy_filtered; 284 PetscBool flg_row,isseqaij; 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 if (!A->rmap->N || !A->cmap->N) { 289 *ncc = 0; 290 *cc = NULL; 291 PetscFunctionReturn(0); 292 } 293 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 294 if (!isseqaij && filter) { 295 PetscBool isseqdense; 296 297 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 298 if (!isseqdense) { 299 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 300 } else { /* TODO: rectangular case and LDA */ 301 PetscScalar *array; 302 PetscReal chop=1.e-6; 303 304 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 305 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 306 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 307 for (i=0;i<n;i++) { 308 PetscInt j; 309 for (j=i+1;j<n;j++) { 310 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 311 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 312 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 313 } 314 } 315 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 316 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 317 } 318 } else { 319 B = A; 320 } 321 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 322 323 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 324 if (filter) { 325 PetscScalar *data; 326 PetscInt j,cum; 327 328 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 329 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 330 cum = 0; 331 for (i=0;i<n;i++) { 332 PetscInt t; 333 334 for (j=xadj[i];j<xadj[i+1];j++) { 335 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 336 continue; 337 } 338 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 339 } 340 t = xadj_filtered[i]; 341 xadj_filtered[i] = cum; 342 cum += t; 343 } 344 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 345 } else { 346 xadj_filtered = NULL; 347 adjncy_filtered = NULL; 348 } 349 350 /* compute local connected components using PCBDDCGraph */ 351 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 353 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 354 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 355 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 357 if (xadj_filtered) { 358 graph->xadj = xadj_filtered; 359 graph->adjncy = adjncy_filtered; 360 } else { 361 graph->xadj = xadj; 362 graph->adjncy = adjncy; 363 } 364 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 365 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 366 /* partial clean up */ 367 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 368 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 369 if (A != B) { 370 ierr = MatDestroy(&B);CHKERRQ(ierr); 371 } 372 373 /* get back data */ 374 if (ncc) *ncc = graph->ncc; 375 if (cc) { 376 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 377 for (i=0;i<graph->ncc;i++) { 378 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 379 } 380 *cc = cc_n; 381 } 382 /* clean up graph */ 383 graph->xadj = 0; 384 graph->adjncy = 0; 385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "PCBDDCBenignCheck" 391 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 392 { 393 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 394 PC_IS* pcis = (PC_IS*)(pc->data); 395 IS dirIS = NULL; 396 PetscInt i; 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 401 if (zerodiag) { 402 Mat A; 403 Vec vec3_N; 404 PetscScalar *vals; 405 const PetscInt *idxs; 406 PetscInt nz,*count; 407 408 /* p0 */ 409 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 410 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 411 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 413 for (i=0;i<nz;i++) vals[i] = 1.; 414 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 416 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 417 /* v_I */ 418 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 419 for (i=0;i<nz;i++) vals[i] = 0.; 420 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 422 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 423 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 424 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 426 if (dirIS) { 427 PetscInt n; 428 429 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 430 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 431 for (i=0;i<n;i++) vals[i] = 0.; 432 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 434 } 435 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 436 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 437 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 438 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 439 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 440 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 441 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 442 if (PetscAbsScalar(vals[0]) > 1.e-1) { 443 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 444 } 445 ierr = PetscFree(vals);CHKERRQ(ierr); 446 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 447 448 /* there should not be any pressure dofs lying on the interface */ 449 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 450 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 451 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 452 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 453 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 454 for (i=0;i<nz;i++) { 455 if (count[idxs[i]]) { 456 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 457 } 458 } 459 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 460 ierr = PetscFree(count);CHKERRQ(ierr); 461 } 462 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 463 464 /* check PCBDDCBenignGetOrSetP0 */ 465 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 466 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 467 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 468 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 469 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 470 for (i=0;i<pcbddc->benign_n;i++) { 471 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 472 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 473 } 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 480 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 481 { 482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 483 IS pressures,zerodiag,*zerodiag_subs; 484 PetscInt nz,n; 485 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 491 for (n=0;n<pcbddc->benign_n;n++) { 492 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 495 pcbddc->benign_n = 0; 496 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 497 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 498 Checks if all the pressure dofs in each subdomain have a zero diagonal 499 If not, a change of basis on pressures is not needed 500 since the local Schur complements are SPD 501 */ 502 has_null_pressures = PETSC_TRUE; 503 have_null = PETSC_TRUE; 504 if (pcbddc->n_ISForDofsLocal) { 505 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 506 507 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 508 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 509 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 510 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 511 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 512 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 513 if (!sorted) { 514 ierr = ISSort(pressures);CHKERRQ(ierr); 515 } 516 } else { 517 pressures = NULL; 518 } 519 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 520 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 521 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 522 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 523 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 524 if (!sorted) { 525 ierr = ISSort(zerodiag);CHKERRQ(ierr); 526 } 527 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 528 if (!nz) { 529 if (n) have_null = PETSC_FALSE; 530 has_null_pressures = PETSC_FALSE; 531 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 532 } 533 recompute_zerodiag = PETSC_FALSE; 534 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 535 zerodiag_subs = NULL; 536 pcbddc->benign_n = 0; 537 if (has_null_pressures) { 538 IS *subs; 539 PetscInt nsubs,i; 540 541 subs = pcbddc->local_subs; 542 nsubs = pcbddc->n_local_subs; 543 if (nsubs > 1) { 544 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 545 for (i=0;i<nsubs;i++) { 546 ISLocalToGlobalMapping l2g; 547 IS t_zerodiag_subs; 548 PetscInt nl; 549 550 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 551 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 552 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 553 if (nl) { 554 PetscBool valid = PETSC_TRUE; 555 556 if (pressures) { 557 IS t_pressure_subs; 558 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 559 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 560 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 561 } 562 if (valid) { 563 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 564 pcbddc->benign_n++; 565 } else { 566 recompute_zerodiag = PETSC_TRUE; 567 } 568 } 569 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 570 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 571 } 572 } else { /* there's just one subdomain (or zero if they have not been detected */ 573 PetscBool valid = PETSC_TRUE; 574 if (pressures) { 575 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 576 } 577 if (valid) { 578 pcbddc->benign_n = 1; 579 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 580 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 581 zerodiag_subs[0] = zerodiag; 582 } 583 } 584 } 585 586 if (!pcbddc->benign_n) { 587 PetscInt n; 588 589 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 590 recompute_zerodiag = PETSC_FALSE; 591 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 592 if (n) { 593 has_null_pressures = PETSC_FALSE; 594 have_null = PETSC_FALSE; 595 } 596 } 597 598 /* final check for null pressures */ 599 if (zerodiag && pressures) { 600 PetscInt nz,np; 601 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 602 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 603 if (nz != np) have_null = PETSC_FALSE; 604 } 605 606 if (recompute_zerodiag) { 607 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 608 if (pcbddc->benign_n == 1) { 609 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 610 zerodiag = zerodiag_subs[0]; 611 } else { 612 PetscInt i,nzn,*new_idxs; 613 614 nzn = 0; 615 for (i=0;i<pcbddc->benign_n;i++) { 616 PetscInt ns; 617 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 618 nzn += ns; 619 } 620 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 621 nzn = 0; 622 for (i=0;i<pcbddc->benign_n;i++) { 623 PetscInt ns,*idxs; 624 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 625 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 626 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 627 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 628 nzn += ns; 629 } 630 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 631 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 632 } 633 have_null = PETSC_FALSE; 634 } 635 636 /* no-net-flux */ 637 if (pcbddc->benign_compute_nonetflux) { 638 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 639 MatNullSpace near_null_space; 640 ISLocalToGlobalMapping rmap; 641 Vec quad_vec; 642 PetscScalar *pvals; 643 PetscInt i,np,*dummyins; 644 645 /* create vector to hold quadrature weights */ 646 ierr = MatCreateVecs(pc->pmat,&quad_vec,NULL);CHKERRQ(ierr); 647 ierr = VecSet(quad_vec,0.0);CHKERRQ(ierr); 648 ierr = MatGetLocalToGlobalMapping(pc->pmat,&rmap,NULL);CHKERRQ(ierr); 649 ierr = VecSetLocalToGlobalMapping(quad_vec,rmap);CHKERRQ(ierr); 650 651 /* compute B^{(i)T} * 1_p */ 652 np = 0; 653 if (zerodiag) { 654 ierr = ISGetLocalSize(zerodiag,&np);CHKERRQ(ierr); 655 } else if (pressures) { 656 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 657 } 658 ierr = PetscMalloc1(np,&pvals);CHKERRQ(ierr); 659 for (i=0;i<np;i++) pvals[i] = 1.; 660 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 661 if (np) { 662 const PetscInt *pidxs; 663 664 if (zerodiag) { 665 ierr = ISGetIndices(zerodiag,&pidxs);CHKERRQ(ierr); 666 } else if (pressures) { 667 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 668 } 669 ierr = VecSetValues(matis->x,np,pidxs,pvals,INSERT_VALUES);CHKERRQ(ierr); 670 if (zerodiag) { 671 ierr = ISRestoreIndices(zerodiag,&pidxs);CHKERRQ(ierr); 672 } else if (pressures) { 673 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 674 } 675 } 676 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 677 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 678 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 679 ierr = PetscFree(pvals);CHKERRQ(ierr); 680 681 /* decide which of the sharing ranks (per dof) has to insert the values (should just be a matter of having a different orientation) */ 682 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 683 for (i=0;i<matis->sf->nroots;i++) matis->sf_rootdata[i] = -1; 684 for (i=0;i<matis->sf->nleaves;i++) matis->sf_leafdata[i] = PetscGlobalRank; 685 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 686 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_MAX);CHKERRQ(ierr); 687 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 688 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 689 ierr = VecGetArray(matis->y,&pvals);CHKERRQ(ierr); 690 for (i=0;i<matis->sf->nleaves;i++) { 691 if (PetscGlobalRank != matis->sf_leafdata[i]) { 692 pvals[i] = 0.; 693 } 694 } 695 ierr = PetscMalloc1(matis->sf->nleaves,&dummyins);CHKERRQ(ierr); 696 for (i=0;i<matis->sf->nleaves;i++) dummyins[i] = i; 697 ierr = VecSetValuesLocal(quad_vec,matis->sf->nleaves,dummyins,pvals,ADD_VALUES);CHKERRQ(ierr); 698 ierr = VecRestoreArray(matis->y,&pvals);CHKERRQ(ierr); 699 ierr = PetscFree(dummyins);CHKERRQ(ierr); 700 701 /* assembly quadrature vec and attach near null space to pmat */ 702 ierr = VecAssemblyBegin(quad_vec);CHKERRQ(ierr); 703 ierr = VecAssemblyEnd(quad_vec);CHKERRQ(ierr); 704 ierr = VecNormalize(quad_vec,NULL);CHKERRQ(ierr); 705 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,1,&quad_vec,&near_null_space);CHKERRQ(ierr); 706 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 707 ierr = MatSetNearNullSpace(pc->pmat,near_null_space);CHKERRQ(ierr); 708 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); 709 } 710 711 /* change of basis and p0 dofs */ 712 if (has_null_pressures) { 713 IS zerodiagc; 714 const PetscInt *idxs,*idxsc; 715 PetscInt i,s,*nnz; 716 717 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 718 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 719 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 720 /* local change of basis for pressures */ 721 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 722 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 723 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 724 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 725 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 726 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 727 for (i=0;i<pcbddc->benign_n;i++) { 728 PetscInt nzs,j; 729 730 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 731 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 732 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 733 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 734 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 735 } 736 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 737 ierr = PetscFree(nnz);CHKERRQ(ierr); 738 /* set identity on velocities */ 739 for (i=0;i<n-nz;i++) { 740 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 741 } 742 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 743 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 744 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 745 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 746 /* set change on pressures */ 747 for (s=0;s<pcbddc->benign_n;s++) { 748 PetscScalar *array; 749 PetscInt nzs; 750 751 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 752 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 753 for (i=0;i<nzs-1;i++) { 754 PetscScalar vals[2]; 755 PetscInt cols[2]; 756 757 cols[0] = idxs[i]; 758 cols[1] = idxs[nzs-1]; 759 vals[0] = 1.; 760 vals[1] = 1.; 761 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 762 } 763 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 764 for (i=0;i<nzs-1;i++) array[i] = -1.; 765 array[nzs-1] = 1.; 766 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 767 /* store local idxs for p0 */ 768 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 769 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 770 ierr = PetscFree(array);CHKERRQ(ierr); 771 } 772 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 /* project if needed */ 775 if (pcbddc->benign_change_explicit) { 776 Mat M; 777 778 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 780 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 781 ierr = MatDestroy(&M);CHKERRQ(ierr); 782 } 783 /* store global idxs for p0 */ 784 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 785 } 786 pcbddc->benign_zerodiag_subs = zerodiag_subs; 787 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 788 789 /* determines if the coarse solver will be singular or not */ 790 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 791 /* determines if the problem has subdomains with 0 pressure block */ 792 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 793 *zerodiaglocal = zerodiag; 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 799 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 800 { 801 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 802 PetscScalar *array; 803 PetscErrorCode ierr; 804 805 PetscFunctionBegin; 806 if (!pcbddc->benign_sf) { 807 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 808 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 809 } 810 if (get) { 811 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 812 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 813 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 814 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 815 } else { 816 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 817 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 819 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 826 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 827 { 828 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 /* TODO: add error checking 833 - avoid nested pop (or push) calls. 834 - cannot push before pop. 835 - cannot call this if pcbddc->local_mat is NULL 836 */ 837 if (!pcbddc->benign_n) { 838 PetscFunctionReturn(0); 839 } 840 if (pop) { 841 if (pcbddc->benign_change_explicit) { 842 IS is_p0; 843 MatReuse reuse; 844 845 /* extract B_0 */ 846 reuse = MAT_INITIAL_MATRIX; 847 if (pcbddc->benign_B0) { 848 reuse = MAT_REUSE_MATRIX; 849 } 850 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 851 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 852 /* remove rows and cols from local problem */ 853 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 854 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 855 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 856 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 857 } else { 858 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 859 PetscScalar *vals; 860 PetscInt i,n,*idxs_ins; 861 862 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 863 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 864 if (!pcbddc->benign_B0) { 865 PetscInt *nnz; 866 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 867 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 868 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 869 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 870 for (i=0;i<pcbddc->benign_n;i++) { 871 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 872 nnz[i] = n - nnz[i]; 873 } 874 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 875 ierr = PetscFree(nnz);CHKERRQ(ierr); 876 } 877 878 for (i=0;i<pcbddc->benign_n;i++) { 879 PetscScalar *array; 880 PetscInt *idxs,j,nz,cum; 881 882 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 883 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 884 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 885 for (j=0;j<nz;j++) vals[j] = 1.; 886 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 887 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 888 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 889 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 890 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 891 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 892 cum = 0; 893 for (j=0;j<n;j++) { 894 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 895 vals[cum] = array[j]; 896 idxs_ins[cum] = j; 897 cum++; 898 } 899 } 900 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 901 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 902 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 903 } 904 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 905 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 906 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 907 } 908 } else { /* push */ 909 if (pcbddc->benign_change_explicit) { 910 PetscInt i; 911 912 for (i=0;i<pcbddc->benign_n;i++) { 913 PetscScalar *B0_vals; 914 PetscInt *B0_cols,B0_ncol; 915 916 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 917 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 918 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 919 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 920 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 921 } 922 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 923 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924 } else { 925 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 926 } 927 } 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "PCBDDCAdaptiveSelection" 933 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 934 { 935 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 936 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 937 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 938 PetscBLASInt *B_iwork,*B_ifail; 939 PetscScalar *work,lwork; 940 PetscScalar *St,*S,*eigv; 941 PetscScalar *Sarray,*Starray; 942 PetscReal *eigs,thresh; 943 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 944 PetscBool allocated_S_St; 945 #if defined(PETSC_USE_COMPLEX) 946 PetscReal *rwork; 947 #endif 948 PetscErrorCode ierr; 949 950 PetscFunctionBegin; 951 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 952 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 953 954 if (pcbddc->dbg_flag) { 955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 957 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 958 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 959 } 960 961 if (pcbddc->dbg_flag) { 962 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 963 } 964 965 /* max size of subsets */ 966 mss = 0; 967 for (i=0;i<sub_schurs->n_subs;i++) { 968 PetscInt subset_size; 969 970 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 971 mss = PetscMax(mss,subset_size); 972 } 973 974 /* min/max and threshold */ 975 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 976 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 977 nmax = PetscMax(nmin,nmax); 978 allocated_S_St = PETSC_FALSE; 979 if (nmin) { 980 allocated_S_St = PETSC_TRUE; 981 } 982 983 /* allocate lapack workspace */ 984 cum = cum2 = 0; 985 maxneigs = 0; 986 for (i=0;i<sub_schurs->n_subs;i++) { 987 PetscInt n,subset_size; 988 989 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 990 n = PetscMin(subset_size,nmax); 991 cum += subset_size; 992 cum2 += subset_size*n; 993 maxneigs = PetscMax(maxneigs,n); 994 } 995 if (mss) { 996 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 997 PetscBLASInt B_itype = 1; 998 PetscBLASInt B_N = mss; 999 PetscReal zero = 0.0; 1000 PetscReal eps = 0.0; /* dlamch? */ 1001 1002 B_lwork = -1; 1003 S = NULL; 1004 St = NULL; 1005 eigs = NULL; 1006 eigv = NULL; 1007 B_iwork = NULL; 1008 B_ifail = NULL; 1009 #if defined(PETSC_USE_COMPLEX) 1010 rwork = NULL; 1011 #endif 1012 thresh = 1.0; 1013 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1014 #if defined(PETSC_USE_COMPLEX) 1015 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1016 #else 1017 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1018 #endif 1019 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1020 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1021 } else { 1022 /* TODO */ 1023 } 1024 } else { 1025 lwork = 0; 1026 } 1027 1028 nv = 0; 1029 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1030 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1031 } 1032 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1033 if (allocated_S_St) { 1034 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1035 } 1036 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1037 #if defined(PETSC_USE_COMPLEX) 1038 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1039 #endif 1040 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1041 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1042 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1043 nv+cum,&pcbddc->adaptive_constraints_idxs, 1044 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1045 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1046 1047 maxneigs = 0; 1048 cum = cumarray = 0; 1049 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1050 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1051 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1052 const PetscInt *idxs; 1053 1054 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1055 for (cum=0;cum<nv;cum++) { 1056 pcbddc->adaptive_constraints_n[cum] = 1; 1057 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1058 pcbddc->adaptive_constraints_data[cum] = 1.0; 1059 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1060 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1061 } 1062 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1063 } 1064 1065 if (mss) { /* multilevel */ 1066 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1067 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1068 } 1069 1070 thresh = pcbddc->adaptive_threshold; 1071 for (i=0;i<sub_schurs->n_subs;i++) { 1072 const PetscInt *idxs; 1073 PetscReal upper,lower; 1074 PetscInt j,subset_size,eigs_start = 0; 1075 PetscBLASInt B_N; 1076 PetscBool same_data = PETSC_FALSE; 1077 1078 if (pcbddc->use_deluxe_scaling) { 1079 upper = PETSC_MAX_REAL; 1080 lower = thresh; 1081 } else { 1082 upper = 1./thresh; 1083 lower = 0.; 1084 } 1085 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1086 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1087 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1088 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1089 if (sub_schurs->is_hermitian) { 1090 PetscInt j,k; 1091 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1092 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1093 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1094 } 1095 for (j=0;j<subset_size;j++) { 1096 for (k=j;k<subset_size;k++) { 1097 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1098 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1099 } 1100 } 1101 } else { 1102 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1103 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1104 } 1105 } else { 1106 S = Sarray + cumarray; 1107 St = Starray + cumarray; 1108 } 1109 /* see if we can save some work */ 1110 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1111 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1112 } 1113 1114 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1115 B_neigs = 0; 1116 } else { 1117 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1118 PetscBLASInt B_itype = 1; 1119 PetscBLASInt B_IL, B_IU; 1120 PetscReal eps = -1.0; /* dlamch? */ 1121 PetscInt nmin_s; 1122 PetscBool compute_range = PETSC_FALSE; 1123 1124 if (pcbddc->dbg_flag) { 1125 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1126 } 1127 1128 compute_range = PETSC_FALSE; 1129 if (thresh > 1.+PETSC_SMALL && !same_data) { 1130 compute_range = PETSC_TRUE; 1131 } 1132 1133 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1134 if (compute_range) { 1135 1136 /* ask for eigenvalues larger than thresh */ 1137 #if defined(PETSC_USE_COMPLEX) 1138 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1139 #else 1140 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1141 #endif 1142 } else if (!same_data) { 1143 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1144 B_IL = 1; 1145 #if defined(PETSC_USE_COMPLEX) 1146 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1147 #else 1148 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1149 #endif 1150 } else { /* same_data is true, so get the adaptive function requested by the user */ 1151 PetscInt k; 1152 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1153 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1154 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1155 nmin = nmax; 1156 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1157 for (k=0;k<nmax;k++) { 1158 eigs[k] = 1./PETSC_SMALL; 1159 eigv[k*(subset_size+1)] = 1.0; 1160 } 1161 } 1162 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1163 if (B_ierr) { 1164 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1165 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1166 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1167 } 1168 1169 if (B_neigs > nmax) { 1170 if (pcbddc->dbg_flag) { 1171 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1172 } 1173 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1174 B_neigs = nmax; 1175 } 1176 1177 nmin_s = PetscMin(nmin,B_N); 1178 if (B_neigs < nmin_s) { 1179 PetscBLASInt B_neigs2; 1180 1181 if (pcbddc->use_deluxe_scaling) { 1182 B_IL = B_N - nmin_s + 1; 1183 B_IU = B_N - B_neigs; 1184 } else { 1185 B_IL = B_neigs + 1; 1186 B_IU = nmin_s; 1187 } 1188 if (pcbddc->dbg_flag) { 1189 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1190 } 1191 if (sub_schurs->is_hermitian) { 1192 PetscInt j; 1193 for (j=0;j<subset_size;j++) { 1194 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1195 } 1196 for (j=0;j<subset_size;j++) { 1197 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 1198 } 1199 } else { 1200 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1201 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1202 } 1203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_COMPLEX) 1205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1206 #else 1207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1208 #endif 1209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1210 B_neigs += B_neigs2; 1211 } 1212 if (B_ierr) { 1213 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1214 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1215 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1216 } 1217 if (pcbddc->dbg_flag) { 1218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1219 for (j=0;j<B_neigs;j++) { 1220 if (eigs[j] == 0.0) { 1221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1222 } else { 1223 if (pcbddc->use_deluxe_scaling) { 1224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1225 } else { 1226 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1227 } 1228 } 1229 } 1230 } 1231 } else { 1232 /* TODO */ 1233 } 1234 } 1235 /* change the basis back to the original one */ 1236 if (sub_schurs->change) { 1237 Mat change,phi,phit; 1238 1239 if (pcbddc->dbg_flag > 1) { 1240 PetscInt ii; 1241 for (ii=0;ii<B_neigs;ii++) { 1242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1243 for (j=0;j<B_N;j++) { 1244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1245 } 1246 } 1247 } 1248 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1250 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1251 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1252 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1253 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1254 } 1255 maxneigs = PetscMax(B_neigs,maxneigs); 1256 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1257 if (B_neigs) { 1258 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1259 1260 if (pcbddc->dbg_flag > 1) { 1261 PetscInt ii; 1262 for (ii=0;ii<B_neigs;ii++) { 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1264 for (j=0;j<B_N;j++) { 1265 #if defined(PETSC_USE_COMPLEX) 1266 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1267 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1269 #else 1270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1271 #endif 1272 } 1273 } 1274 } 1275 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1276 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1277 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1278 cum++; 1279 } 1280 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1281 /* shift for next computation */ 1282 cumarray += subset_size*subset_size; 1283 } 1284 if (pcbddc->dbg_flag) { 1285 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1286 } 1287 1288 if (mss) { 1289 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1290 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1291 /* destroy matrices (junk) */ 1292 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1293 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1294 } 1295 if (allocated_S_St) { 1296 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1297 } 1298 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1299 #if defined(PETSC_USE_COMPLEX) 1300 ierr = PetscFree(rwork);CHKERRQ(ierr); 1301 #endif 1302 if (pcbddc->dbg_flag) { 1303 PetscInt maxneigs_r; 1304 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "PCBDDCSetUpSolvers" 1312 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1313 { 1314 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1315 PetscScalar *coarse_submat_vals; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1320 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1321 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1322 1323 /* Setup local neumann solver ksp_R */ 1324 /* PCBDDCSetUpLocalScatters should be called first! */ 1325 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1326 1327 /* Change global null space passed in by the user if change of basis has been requested */ 1328 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 1329 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 1330 } 1331 1332 /* 1333 Setup local correction and local part of coarse basis. 1334 Gives back the dense local part of the coarse matrix in column major ordering 1335 */ 1336 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1337 1338 /* Compute total number of coarse nodes and setup coarse solver */ 1339 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1340 1341 /* free */ 1342 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "PCBDDCResetCustomization" 1348 PetscErrorCode PCBDDCResetCustomization(PC pc) 1349 { 1350 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1355 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1356 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1357 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 1358 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1359 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1360 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1361 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1362 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1363 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1364 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1365 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "PCBDDCResetTopography" 1371 PetscErrorCode PCBDDCResetTopography(PC pc) 1372 { 1373 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1374 PetscInt i; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1379 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1380 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1381 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1382 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1384 for (i=0;i<pcbddc->n_local_subs;i++) { 1385 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1388 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "PCBDDCResetSolvers" 1394 PetscErrorCode PCBDDCResetSolvers(PC pc) 1395 { 1396 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1401 if (pcbddc->coarse_phi_B) { 1402 PetscScalar *array; 1403 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1404 ierr = PetscFree(array);CHKERRQ(ierr); 1405 } 1406 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1407 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1408 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1409 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1410 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1411 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1412 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1413 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1414 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1415 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1416 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1417 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1418 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1419 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1420 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1421 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1422 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1423 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1424 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1425 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1426 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1427 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1428 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1429 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1430 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1431 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1432 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1433 if (pcbddc->benign_zerodiag_subs) { 1434 PetscInt i; 1435 for (i=0;i<pcbddc->benign_n;i++) { 1436 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1437 } 1438 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1439 } 1440 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1446 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1447 { 1448 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1449 PC_IS *pcis = (PC_IS*)pc->data; 1450 VecType impVecType; 1451 PetscInt n_constraints,n_R,old_size; 1452 PetscErrorCode ierr; 1453 1454 PetscFunctionBegin; 1455 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1456 /* get sizes */ 1457 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1458 n_R = pcis->n - pcbddc->n_vertices; 1459 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1460 /* local work vectors (try to avoid unneeded work)*/ 1461 /* R nodes */ 1462 old_size = -1; 1463 if (pcbddc->vec1_R) { 1464 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1465 } 1466 if (n_R != old_size) { 1467 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1468 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1469 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1470 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1471 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1472 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1473 } 1474 /* local primal dofs */ 1475 old_size = -1; 1476 if (pcbddc->vec1_P) { 1477 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1478 } 1479 if (pcbddc->local_primal_size != old_size) { 1480 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1481 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1482 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1483 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1484 } 1485 /* local explicit constraints */ 1486 old_size = -1; 1487 if (pcbddc->vec1_C) { 1488 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1489 } 1490 if (n_constraints && n_constraints != old_size) { 1491 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1493 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1494 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "PCBDDCSetUpCorrection" 1501 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1502 { 1503 PetscErrorCode ierr; 1504 /* pointers to pcis and pcbddc */ 1505 PC_IS* pcis = (PC_IS*)pc->data; 1506 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1507 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1508 /* submatrices of local problem */ 1509 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1510 /* submatrices of local coarse problem */ 1511 Mat S_VV,S_CV,S_VC,S_CC; 1512 /* working matrices */ 1513 Mat C_CR; 1514 /* additional working stuff */ 1515 PC pc_R; 1516 Mat F; 1517 Vec dummy_vec; 1518 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1519 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1520 PetscScalar *work; 1521 PetscInt *idx_V_B; 1522 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1523 PetscInt i,n_R,n_D,n_B; 1524 1525 /* some shortcuts to scalars */ 1526 PetscScalar one=1.0,m_one=-1.0; 1527 1528 PetscFunctionBegin; 1529 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1530 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1531 } 1532 1533 /* Set Non-overlapping dimensions */ 1534 n_vertices = pcbddc->n_vertices; 1535 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1536 n_B = pcis->n_B; 1537 n_D = pcis->n - n_B; 1538 n_R = pcis->n - n_vertices; 1539 1540 /* vertices in boundary numbering */ 1541 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1542 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1543 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1544 1545 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1546 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1547 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1548 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1550 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1551 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1552 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1553 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1554 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1555 1556 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1557 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1558 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1559 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1560 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1561 lda_rhs = n_R; 1562 need_benign_correction = PETSC_FALSE; 1563 if (isLU || isILU || isCHOL) { 1564 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1565 } else if (sub_schurs->reuse_solver) { 1566 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1567 MatFactorType type; 1568 1569 F = reuse_solver->F; 1570 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1571 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1572 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1573 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1574 } else { 1575 F = NULL; 1576 } 1577 1578 /* allocate workspace */ 1579 n = 0; 1580 if (n_constraints) { 1581 n += lda_rhs*n_constraints; 1582 } 1583 if (n_vertices) { 1584 n = PetscMax(2*lda_rhs*n_vertices,n); 1585 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1586 } 1587 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1588 1589 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1590 dummy_vec = NULL; 1591 if (need_benign_correction && lda_rhs != n_R && F) { 1592 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1593 } 1594 1595 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1596 if (n_constraints) { 1597 Mat M1,M2,M3,C_B; 1598 IS is_aux; 1599 PetscScalar *array,*array2; 1600 1601 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1602 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1603 1604 /* Extract constraints on R nodes: C_{CR} */ 1605 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1606 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1607 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1608 1609 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1610 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1611 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1612 for (i=0;i<n_constraints;i++) { 1613 const PetscScalar *row_cmat_values; 1614 const PetscInt *row_cmat_indices; 1615 PetscInt size_of_constraint,j; 1616 1617 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1618 for (j=0;j<size_of_constraint;j++) { 1619 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1620 } 1621 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1622 } 1623 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1624 if (F) { 1625 Mat B; 1626 1627 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1628 if (need_benign_correction) { 1629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1630 1631 /* rhs is already zero on interior dofs, no need to change the rhs */ 1632 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1633 } 1634 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1635 if (need_benign_correction) { 1636 PetscScalar *marr; 1637 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1638 1639 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1640 if (lda_rhs != n_R) { 1641 for (i=0;i<n_constraints;i++) { 1642 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1643 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1644 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1645 } 1646 } else { 1647 for (i=0;i<n_constraints;i++) { 1648 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1649 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1650 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1651 } 1652 } 1653 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1654 } 1655 ierr = MatDestroy(&B);CHKERRQ(ierr); 1656 } else { 1657 PetscScalar *marr; 1658 1659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1660 for (i=0;i<n_constraints;i++) { 1661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1666 } 1667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1668 } 1669 if (!pcbddc->switch_static) { 1670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1673 for (i=0;i<n_constraints;i++) { 1674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1680 } 1681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1683 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1684 } else { 1685 if (lda_rhs != n_R) { 1686 IS dummy; 1687 1688 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1689 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1690 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1691 } else { 1692 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1693 pcbddc->local_auxmat2 = local_auxmat2_R; 1694 } 1695 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1696 } 1697 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1698 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1699 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1700 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1701 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1702 if (isCHOL) { 1703 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1704 } else { 1705 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1706 } 1707 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1708 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1709 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1710 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1711 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1712 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1713 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1714 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1715 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1716 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1717 } 1718 1719 /* Get submatrices from subdomain matrix */ 1720 if (n_vertices) { 1721 IS is_aux; 1722 1723 if (sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1724 IS tis; 1725 1726 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1727 ierr = ISSort(tis);CHKERRQ(ierr); 1728 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1729 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1730 } else { 1731 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1732 } 1733 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1734 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1735 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1736 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1737 } 1738 1739 /* Matrix of coarse basis functions (local) */ 1740 if (pcbddc->coarse_phi_B) { 1741 PetscInt on_B,on_primal,on_D=n_D; 1742 if (pcbddc->coarse_phi_D) { 1743 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1744 } 1745 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1746 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1747 PetscScalar *marray; 1748 1749 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1750 ierr = PetscFree(marray);CHKERRQ(ierr); 1751 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1752 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1753 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1754 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1755 } 1756 } 1757 1758 if (!pcbddc->coarse_phi_B) { 1759 PetscScalar *marray; 1760 1761 n = n_B*pcbddc->local_primal_size; 1762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1763 n += n_D*pcbddc->local_primal_size; 1764 } 1765 if (!pcbddc->symmetric_primal) { 1766 n *= 2; 1767 } 1768 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1770 n = n_B*pcbddc->local_primal_size; 1771 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1773 n += n_D*pcbddc->local_primal_size; 1774 } 1775 if (!pcbddc->symmetric_primal) { 1776 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1777 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1778 n = n_B*pcbddc->local_primal_size; 1779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1780 } 1781 } else { 1782 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1783 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1784 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1785 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1786 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1787 } 1788 } 1789 } 1790 1791 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1792 p0_lidx_I = NULL; 1793 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1794 const PetscInt *idxs; 1795 1796 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1797 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1798 for (i=0;i<pcbddc->benign_n;i++) { 1799 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1800 } 1801 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1802 } 1803 1804 /* vertices */ 1805 if (n_vertices) { 1806 1807 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 1808 1809 if (n_R) { 1810 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 1811 PetscBLASInt B_N,B_one = 1; 1812 PetscScalar *x,*y; 1813 PetscBool isseqaij; 1814 1815 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1816 if (need_benign_correction) { 1817 ISLocalToGlobalMapping RtoN; 1818 IS is_p0; 1819 PetscInt *idxs_p0,n; 1820 1821 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 1822 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 1823 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 1824 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 1825 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 1826 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 1827 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 1828 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1829 } 1830 1831 if (lda_rhs == n_R) { 1832 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1833 } else { 1834 PetscScalar *av,*array; 1835 const PetscInt *xadj,*adjncy; 1836 PetscInt n; 1837 PetscBool flg_row; 1838 1839 array = work+lda_rhs*n_vertices; 1840 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1841 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 1842 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1843 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1844 for (i=0;i<n;i++) { 1845 PetscInt j; 1846 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1847 } 1848 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1849 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1851 } 1852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1853 if (need_benign_correction) { 1854 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1855 PetscScalar *marr; 1856 1857 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1858 /* need \Phi^T A_RV = (I+L)A_RV, L given by 1859 1860 | 0 0 0 | (V) 1861 L = | 0 0 -1 | (P-p0) 1862 | 0 0 -1 | (p0) 1863 1864 */ 1865 for (i=0;i<reuse_solver->benign_n;i++) { 1866 const PetscScalar *vals; 1867 const PetscInt *idxs,*idxs_zero; 1868 PetscInt n,j,nz; 1869 1870 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1871 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1872 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1873 for (j=0;j<n;j++) { 1874 PetscScalar val = vals[j]; 1875 PetscInt k,col = idxs[j]; 1876 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 1877 } 1878 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1879 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1880 } 1881 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1882 } 1883 if (F) { 1884 /* need to correct the rhs */ 1885 if (need_benign_correction) { 1886 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1887 PetscScalar *marr; 1888 1889 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 1890 if (lda_rhs != n_R) { 1891 for (i=0;i<n_vertices;i++) { 1892 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1893 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1894 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1895 } 1896 } else { 1897 for (i=0;i<n_vertices;i++) { 1898 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1899 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1901 } 1902 } 1903 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 1904 } 1905 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1906 /* need to correct the solution */ 1907 if (need_benign_correction) { 1908 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1909 PetscScalar *marr; 1910 1911 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1912 if (lda_rhs != n_R) { 1913 for (i=0;i<n_vertices;i++) { 1914 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1915 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1916 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1917 } 1918 } else { 1919 for (i=0;i<n_vertices;i++) { 1920 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1921 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1922 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1923 } 1924 } 1925 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 1926 } 1927 } else { 1928 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1929 for (i=0;i<n_vertices;i++) { 1930 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1931 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1932 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1933 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1934 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1935 } 1936 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1937 } 1938 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1939 /* S_VV and S_CV */ 1940 if (n_constraints) { 1941 Mat B; 1942 1943 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1944 for (i=0;i<n_vertices;i++) { 1945 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1946 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1947 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1948 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1951 } 1952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1953 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1954 ierr = MatDestroy(&B);CHKERRQ(ierr); 1955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1956 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1957 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1959 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1960 ierr = MatDestroy(&B);CHKERRQ(ierr); 1961 } 1962 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1963 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1964 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 1965 } 1966 if (lda_rhs != n_R) { 1967 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1968 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1969 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1970 } 1971 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1972 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 1973 if (need_benign_correction) { 1974 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1975 PetscScalar *marr,*sums; 1976 1977 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 1978 ierr = MatDenseGetArray(S_VVt,&marr); 1979 for (i=0;i<reuse_solver->benign_n;i++) { 1980 const PetscScalar *vals; 1981 const PetscInt *idxs,*idxs_zero; 1982 PetscInt n,j,nz; 1983 1984 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1985 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 1986 for (j=0;j<n_vertices;j++) { 1987 PetscInt k; 1988 sums[j] = 0.; 1989 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 1990 } 1991 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 1992 for (j=0;j<n;j++) { 1993 PetscScalar val = vals[j]; 1994 PetscInt k; 1995 for (k=0;k<n_vertices;k++) { 1996 marr[idxs[j]+k*n_vertices] += val*sums[k]; 1997 } 1998 } 1999 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2000 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2001 } 2002 ierr = PetscFree(sums);CHKERRQ(ierr); 2003 ierr = MatDenseRestoreArray(S_VVt,&marr); 2004 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2005 } 2006 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2008 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2009 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2010 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2011 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2012 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2013 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2014 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2015 } else { 2016 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2017 } 2018 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2019 2020 /* coarse basis functions */ 2021 for (i=0;i<n_vertices;i++) { 2022 PetscScalar *y; 2023 2024 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2025 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2026 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2027 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2028 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2029 y[n_B*i+idx_V_B[i]] = 1.0; 2030 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2031 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2032 2033 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2034 PetscInt j; 2035 2036 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2037 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2038 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2039 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2040 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2041 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2042 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2043 } 2044 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2045 } 2046 /* if n_R == 0 the object is not destroyed */ 2047 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2048 } 2049 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2050 2051 if (n_constraints) { 2052 Mat B; 2053 2054 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2055 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2056 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2057 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2058 if (n_vertices) { 2059 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2060 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2061 } else { 2062 Mat S_VCt; 2063 2064 if (lda_rhs != n_R) { 2065 ierr = MatDestroy(&B);CHKERRQ(ierr); 2066 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2067 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2068 } 2069 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2070 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2071 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2072 } 2073 } 2074 ierr = MatDestroy(&B);CHKERRQ(ierr); 2075 /* coarse basis functions */ 2076 for (i=0;i<n_constraints;i++) { 2077 PetscScalar *y; 2078 2079 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2080 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2081 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2082 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2083 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2085 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2086 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2087 PetscInt j; 2088 2089 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2090 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2091 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2092 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2094 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2095 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2096 } 2097 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2098 } 2099 } 2100 if (n_constraints) { 2101 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2102 } 2103 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2104 2105 /* coarse matrix entries relative to B_0 */ 2106 if (pcbddc->benign_n) { 2107 Mat B0_B,B0_BPHI; 2108 IS is_dummy; 2109 PetscScalar *data; 2110 PetscInt j; 2111 2112 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2113 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2114 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2115 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2116 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2117 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2118 for (j=0;j<pcbddc->benign_n;j++) { 2119 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2120 for (i=0;i<pcbddc->local_primal_size;i++) { 2121 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2122 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2123 } 2124 } 2125 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2126 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2127 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2128 } 2129 2130 /* compute other basis functions for non-symmetric problems */ 2131 if (!pcbddc->symmetric_primal) { 2132 Mat B_V=NULL,B_C=NULL; 2133 PetscScalar *marray; 2134 2135 if (n_constraints) { 2136 Mat S_CCT,C_CRT; 2137 2138 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2139 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2140 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2141 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2142 if (n_vertices) { 2143 Mat S_VCT; 2144 2145 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2146 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2147 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2148 } 2149 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2150 } 2151 if (n_vertices && n_R) { 2152 PetscScalar *av,*marray; 2153 const PetscInt *xadj,*adjncy; 2154 PetscInt n; 2155 PetscBool flg_row; 2156 2157 /* B_V = B_V - A_VR^T */ 2158 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2159 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2160 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2161 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2162 for (i=0;i<n;i++) { 2163 PetscInt j; 2164 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2165 } 2166 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2167 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2168 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2169 } 2170 2171 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2172 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2173 for (i=0;i<n_vertices;i++) { 2174 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2175 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2176 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2177 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2178 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2179 } 2180 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2181 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2182 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2183 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2184 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2185 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2186 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2187 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2188 } 2189 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2190 /* coarse basis functions */ 2191 for (i=0;i<pcbddc->local_primal_size;i++) { 2192 PetscScalar *y; 2193 2194 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2195 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2196 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2197 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2198 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2199 if (i<n_vertices) { 2200 y[n_B*i+idx_V_B[i]] = 1.0; 2201 } 2202 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2203 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2204 2205 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2206 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2207 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2208 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2209 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2210 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2211 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2212 } 2213 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2214 } 2215 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2216 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2217 } 2218 /* free memory */ 2219 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2220 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2221 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2222 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2223 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2224 ierr = PetscFree(work);CHKERRQ(ierr); 2225 if (n_vertices) { 2226 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2227 } 2228 if (n_constraints) { 2229 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2230 } 2231 /* Checking coarse_sub_mat and coarse basis functios */ 2232 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2233 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2234 if (pcbddc->dbg_flag) { 2235 Mat coarse_sub_mat; 2236 Mat AUXMAT,TM1,TM2,TM3,TM4; 2237 Mat coarse_phi_D,coarse_phi_B; 2238 Mat coarse_psi_D,coarse_psi_B; 2239 Mat A_II,A_BB,A_IB,A_BI; 2240 Mat C_B,CPHI; 2241 IS is_dummy; 2242 Vec mones; 2243 MatType checkmattype=MATSEQAIJ; 2244 PetscReal real_value; 2245 2246 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2247 Mat A; 2248 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2249 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2250 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2251 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2252 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2253 ierr = MatDestroy(&A);CHKERRQ(ierr); 2254 } else { 2255 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2256 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2257 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2258 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2259 } 2260 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2261 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2262 if (!pcbddc->symmetric_primal) { 2263 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2264 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2265 } 2266 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2267 2268 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2271 if (!pcbddc->symmetric_primal) { 2272 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2273 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2274 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2275 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2276 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2277 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2278 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2279 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2280 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2281 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2282 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2283 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2284 } else { 2285 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2286 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2287 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2288 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2289 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2290 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2291 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2292 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2293 } 2294 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2295 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2296 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2297 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2298 if (pcbddc->benign_n) { 2299 Mat B0_B,B0_BPHI; 2300 PetscScalar *data,*data2; 2301 PetscInt j; 2302 2303 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2304 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2305 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2306 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2307 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2308 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2309 for (j=0;j<pcbddc->benign_n;j++) { 2310 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2311 for (i=0;i<pcbddc->local_primal_size;i++) { 2312 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2313 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2314 } 2315 } 2316 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2317 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2318 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2319 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2320 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2321 } 2322 #if 0 2323 { 2324 PetscViewer viewer; 2325 char filename[256]; 2326 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2327 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2328 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2329 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2330 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2331 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2332 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2333 if (save_change) { 2334 Mat phi_B; 2335 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2337 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2338 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2339 } else { 2340 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2341 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2342 } 2343 if (pcbddc->coarse_phi_D) { 2344 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2345 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2346 } 2347 if (pcbddc->coarse_psi_B) { 2348 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2349 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2350 } 2351 if (pcbddc->coarse_psi_D) { 2352 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2353 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2354 } 2355 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2356 } 2357 #endif 2358 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2359 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2360 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2362 2363 /* check constraints */ 2364 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2365 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2366 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2367 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2368 } else { 2369 PetscScalar *data; 2370 Mat tmat; 2371 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2372 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2373 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2374 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2375 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2376 } 2377 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2378 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2379 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2380 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2381 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2382 if (!pcbddc->symmetric_primal) { 2383 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2384 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2385 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2386 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2388 } 2389 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2390 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2391 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2392 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2394 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2395 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2396 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2397 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2398 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2399 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2400 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2401 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2402 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2403 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2404 if (!pcbddc->symmetric_primal) { 2405 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2406 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2407 } 2408 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2409 } 2410 /* get back data */ 2411 *coarse_submat_vals_n = coarse_submat_vals; 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2417 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2418 { 2419 Mat *work_mat; 2420 IS isrow_s,iscol_s; 2421 PetscBool rsorted,csorted; 2422 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2427 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2428 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2429 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2430 2431 if (!rsorted) { 2432 const PetscInt *idxs; 2433 PetscInt *idxs_sorted,i; 2434 2435 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2436 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2437 for (i=0;i<rsize;i++) { 2438 idxs_perm_r[i] = i; 2439 } 2440 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2441 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2442 for (i=0;i<rsize;i++) { 2443 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2444 } 2445 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2446 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2447 } else { 2448 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2449 isrow_s = isrow; 2450 } 2451 2452 if (!csorted) { 2453 if (isrow == iscol) { 2454 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2455 iscol_s = isrow_s; 2456 } else { 2457 const PetscInt *idxs; 2458 PetscInt *idxs_sorted,i; 2459 2460 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2461 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2462 for (i=0;i<csize;i++) { 2463 idxs_perm_c[i] = i; 2464 } 2465 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2466 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2467 for (i=0;i<csize;i++) { 2468 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2469 } 2470 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2471 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2472 } 2473 } else { 2474 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2475 iscol_s = iscol; 2476 } 2477 2478 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2479 2480 if (!rsorted || !csorted) { 2481 Mat new_mat; 2482 IS is_perm_r,is_perm_c; 2483 2484 if (!rsorted) { 2485 PetscInt *idxs_r,i; 2486 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2487 for (i=0;i<rsize;i++) { 2488 idxs_r[idxs_perm_r[i]] = i; 2489 } 2490 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2491 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2492 } else { 2493 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2494 } 2495 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2496 2497 if (!csorted) { 2498 if (isrow_s == iscol_s) { 2499 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2500 is_perm_c = is_perm_r; 2501 } else { 2502 PetscInt *idxs_c,i; 2503 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2504 for (i=0;i<csize;i++) { 2505 idxs_c[idxs_perm_c[i]] = i; 2506 } 2507 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2508 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2509 } 2510 } else { 2511 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2512 } 2513 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2514 2515 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2516 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2517 work_mat[0] = new_mat; 2518 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2519 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2520 } 2521 2522 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2523 *B = work_mat[0]; 2524 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2525 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2526 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2532 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2533 { 2534 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2535 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2536 Mat new_mat; 2537 IS is_local,is_global; 2538 PetscInt local_size; 2539 PetscBool isseqaij; 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2544 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2545 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2546 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2547 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2548 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2549 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2550 2551 /* check */ 2552 if (pcbddc->dbg_flag) { 2553 Vec x,x_change; 2554 PetscReal error; 2555 2556 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2557 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2558 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2559 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2560 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2561 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2562 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2563 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2564 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2565 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2568 ierr = VecDestroy(&x);CHKERRQ(ierr); 2569 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2570 } 2571 2572 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2573 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2574 if (isseqaij) { 2575 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2576 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2577 } else { 2578 Mat work_mat; 2579 2580 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2581 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2582 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2583 } 2584 if (matis->A->symmetric_set) { 2585 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2586 #if !defined(PETSC_USE_COMPLEX) 2587 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2588 #endif 2589 } 2590 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 #undef __FUNCT__ 2595 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2596 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2597 { 2598 PC_IS* pcis = (PC_IS*)(pc->data); 2599 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2600 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2601 PetscInt *idx_R_local=NULL; 2602 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2603 PetscInt vbs,bs; 2604 PetscBT bitmask=NULL; 2605 PetscErrorCode ierr; 2606 2607 PetscFunctionBegin; 2608 /* 2609 No need to setup local scatters if 2610 - primal space is unchanged 2611 AND 2612 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2613 AND 2614 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2615 */ 2616 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2617 PetscFunctionReturn(0); 2618 } 2619 /* destroy old objects */ 2620 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2621 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2622 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2623 /* Set Non-overlapping dimensions */ 2624 n_B = pcis->n_B; 2625 n_D = pcis->n - n_B; 2626 n_vertices = pcbddc->n_vertices; 2627 2628 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2629 2630 /* create auxiliary bitmask and allocate workspace */ 2631 if (!sub_schurs->reuse_solver) { 2632 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2633 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2634 for (i=0;i<n_vertices;i++) { 2635 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2636 } 2637 2638 for (i=0, n_R=0; i<pcis->n; i++) { 2639 if (!PetscBTLookup(bitmask,i)) { 2640 idx_R_local[n_R++] = i; 2641 } 2642 } 2643 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2644 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2645 2646 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2647 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2648 } 2649 2650 /* Block code */ 2651 vbs = 1; 2652 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2653 if (bs>1 && !(n_vertices%bs)) { 2654 PetscBool is_blocked = PETSC_TRUE; 2655 PetscInt *vary; 2656 if (!sub_schurs->reuse_solver) { 2657 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2658 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2659 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2660 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2661 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2662 for (i=0; i<pcis->n/bs; i++) { 2663 if (vary[i]!=0 && vary[i]!=bs) { 2664 is_blocked = PETSC_FALSE; 2665 break; 2666 } 2667 } 2668 ierr = PetscFree(vary);CHKERRQ(ierr); 2669 } else { 2670 /* Verify directly the R set */ 2671 for (i=0; i<n_R/bs; i++) { 2672 PetscInt j,node=idx_R_local[bs*i]; 2673 for (j=1; j<bs; j++) { 2674 if (node != idx_R_local[bs*i+j]-j) { 2675 is_blocked = PETSC_FALSE; 2676 break; 2677 } 2678 } 2679 } 2680 } 2681 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2682 vbs = bs; 2683 for (i=0;i<n_R/vbs;i++) { 2684 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2685 } 2686 } 2687 } 2688 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2689 if (sub_schurs->reuse_solver) { 2690 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2691 2692 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2693 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2694 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2695 reuse_solver->is_R = pcbddc->is_R_local; 2696 } else { 2697 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2698 } 2699 2700 /* print some info if requested */ 2701 if (pcbddc->dbg_flag) { 2702 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2709 } 2710 2711 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2712 if (!sub_schurs->reuse_solver) { 2713 IS is_aux1,is_aux2; 2714 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2715 2716 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2717 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2718 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2719 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2720 for (i=0; i<n_D; i++) { 2721 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2722 } 2723 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2724 for (i=0, j=0; i<n_R; i++) { 2725 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2726 aux_array1[j++] = i; 2727 } 2728 } 2729 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2730 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2731 for (i=0, j=0; i<n_B; i++) { 2732 if (!PetscBTLookup(bitmask,is_indices[i])) { 2733 aux_array2[j++] = i; 2734 } 2735 } 2736 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2738 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2739 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2740 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2741 2742 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2743 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2744 for (i=0, j=0; i<n_R; i++) { 2745 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2746 aux_array1[j++] = i; 2747 } 2748 } 2749 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2750 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2751 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2752 } 2753 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2754 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2755 } else { 2756 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2757 IS tis; 2758 PetscInt schur_size; 2759 2760 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 2761 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2762 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2763 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2764 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2765 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2766 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2767 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2768 } 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2776 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2777 { 2778 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2779 PC_IS *pcis = (PC_IS*)pc->data; 2780 PC pc_temp; 2781 Mat A_RR; 2782 MatReuse reuse; 2783 PetscScalar m_one = -1.0; 2784 PetscReal value; 2785 PetscInt n_D,n_R; 2786 PetscBool use_exact,use_exact_reduced,issbaij; 2787 PetscErrorCode ierr; 2788 /* prefixes stuff */ 2789 char dir_prefix[256],neu_prefix[256],str_level[16]; 2790 size_t len; 2791 2792 PetscFunctionBegin; 2793 2794 /* compute prefixes */ 2795 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2796 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2797 if (!pcbddc->current_level) { 2798 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2799 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2800 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2801 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2802 } else { 2803 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2804 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2805 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2806 len -= 15; /* remove "pc_bddc_coarse_" */ 2807 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2808 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2809 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2810 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2811 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2812 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2813 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2814 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2815 } 2816 2817 /* DIRICHLET PROBLEM */ 2818 if (dirichlet) { 2819 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2820 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2821 if (!sub_schurs->reuse_solver) { 2822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 2823 } 2824 if (pcbddc->dbg_flag) { 2825 Mat A_IIn; 2826 2827 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 2828 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 2829 pcis->A_II = A_IIn; 2830 } 2831 } 2832 if (pcbddc->local_mat->symmetric_set) { 2833 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2834 } 2835 /* Matrix for Dirichlet problem is pcis->A_II */ 2836 n_D = pcis->n - pcis->n_B; 2837 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2838 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2839 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2840 /* default */ 2841 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2842 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2843 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2844 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2845 if (issbaij) { 2846 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2847 } else { 2848 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2849 } 2850 /* Allow user's customization */ 2851 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2852 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2853 } 2854 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2855 if (sub_schurs->reuse_solver) { 2856 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2857 2858 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 2859 } 2860 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2861 if (!n_D) { 2862 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2863 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2864 } 2865 /* Set Up KSP for Dirichlet problem of BDDC */ 2866 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2867 /* set ksp_D into pcis data */ 2868 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2869 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2870 pcis->ksp_D = pcbddc->ksp_D; 2871 } 2872 2873 /* NEUMANN PROBLEM */ 2874 A_RR = 0; 2875 if (neumann) { 2876 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2877 PetscInt ibs,mbs; 2878 PetscBool issbaij; 2879 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2880 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2881 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2882 if (pcbddc->ksp_R) { /* already created ksp */ 2883 PetscInt nn_R; 2884 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2885 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2886 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2887 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2888 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2889 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2890 reuse = MAT_INITIAL_MATRIX; 2891 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2892 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2893 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2894 reuse = MAT_INITIAL_MATRIX; 2895 } else { /* safe to reuse the matrix */ 2896 reuse = MAT_REUSE_MATRIX; 2897 } 2898 } 2899 /* last check */ 2900 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2901 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2902 reuse = MAT_INITIAL_MATRIX; 2903 } 2904 } else { /* first time, so we need to create the matrix */ 2905 reuse = MAT_INITIAL_MATRIX; 2906 } 2907 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 2908 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2909 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2910 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2911 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2912 if (matis->A == pcbddc->local_mat) { 2913 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2914 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2915 } else { 2916 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2917 } 2918 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2919 if (matis->A == pcbddc->local_mat) { 2920 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2921 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2922 } else { 2923 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2924 } 2925 } 2926 /* extract A_RR */ 2927 if (sub_schurs->reuse_solver) { 2928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2929 2930 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 2931 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2932 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 2933 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 2934 } else { 2935 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2936 } 2937 } else { 2938 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2939 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2940 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2941 } 2942 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 2943 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2944 } 2945 if (pcbddc->local_mat->symmetric_set) { 2946 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2947 } 2948 if (!pcbddc->ksp_R) { /* create object if not present */ 2949 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2950 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2951 /* default */ 2952 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2953 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2954 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2955 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2956 if (issbaij) { 2957 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2958 } else { 2959 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2960 } 2961 /* Allow user's customization */ 2962 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2963 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2964 } 2965 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2966 if (!n_R) { 2967 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2968 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2969 } 2970 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2971 /* Reuse solver if it is present */ 2972 if (sub_schurs->reuse_solver) { 2973 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2974 2975 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 2976 } 2977 /* Set Up KSP for Neumann problem of BDDC */ 2978 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2979 } 2980 2981 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2982 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2983 if (pcbddc->dbg_flag) { 2984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2985 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2986 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2987 } 2988 if (dirichlet) { /* Dirichlet */ 2989 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2990 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2991 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2992 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2993 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2994 /* need to be adapted? */ 2995 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2996 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2997 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2998 /* print info */ 2999 if (pcbddc->dbg_flag) { 3000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3002 } 3003 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 3004 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 3005 } 3006 } 3007 if (neumann) { /* Neumann */ 3008 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3009 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3010 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3011 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3012 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3013 /* need to be adapted? */ 3014 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 3015 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3016 /* print info */ 3017 if (pcbddc->dbg_flag) { 3018 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3019 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3020 } 3021 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 3022 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 3023 } 3024 } 3025 } 3026 /* free Neumann problem's matrix */ 3027 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3033 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3034 { 3035 PetscErrorCode ierr; 3036 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3037 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3038 3039 PetscFunctionBegin; 3040 if (!sub_schurs->reuse_solver) { 3041 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3042 } 3043 if (!pcbddc->switch_static) { 3044 if (applytranspose && pcbddc->local_auxmat1) { 3045 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3046 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3047 } 3048 if (!sub_schurs->reuse_solver) { 3049 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3050 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3051 } else { 3052 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3053 3054 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3055 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3056 } 3057 } else { 3058 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3059 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3060 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3061 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3062 if (applytranspose && pcbddc->local_auxmat1) { 3063 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3064 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3065 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3066 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3067 } 3068 } 3069 if (!sub_schurs->reuse_solver || pcbddc->switch_static) { 3070 if (applytranspose) { 3071 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3072 } else { 3073 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3074 } 3075 } else { 3076 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3077 3078 if (applytranspose) { 3079 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3080 } else { 3081 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3082 } 3083 } 3084 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3085 if (!pcbddc->switch_static) { 3086 if (!sub_schurs->reuse_solver) { 3087 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3088 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3089 } else { 3090 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3091 3092 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3093 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3094 } 3095 if (!applytranspose && pcbddc->local_auxmat1) { 3096 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3097 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3098 } 3099 } else { 3100 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 if (!applytranspose && pcbddc->local_auxmat1) { 3105 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3106 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3107 } 3108 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3110 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3111 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3112 } 3113 PetscFunctionReturn(0); 3114 } 3115 3116 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3117 #undef __FUNCT__ 3118 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3119 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3120 { 3121 PetscErrorCode ierr; 3122 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3123 PC_IS* pcis = (PC_IS*) (pc->data); 3124 const PetscScalar zero = 0.0; 3125 3126 PetscFunctionBegin; 3127 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3128 if (!pcbddc->benign_apply_coarse_only) { 3129 if (applytranspose) { 3130 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3131 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3132 } else { 3133 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3134 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3135 } 3136 } else { 3137 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3138 } 3139 3140 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3141 if (pcbddc->benign_n) { 3142 PetscScalar *array; 3143 PetscInt j; 3144 3145 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3146 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3147 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3148 } 3149 3150 /* start communications from local primal nodes to rhs of coarse solver */ 3151 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3152 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3153 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3154 3155 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3156 /* TODO remove null space when doing multilevel */ 3157 if (pcbddc->coarse_ksp) { 3158 Mat coarse_mat; 3159 Vec rhs,sol; 3160 MatNullSpace nullsp; 3161 PetscBool isbddc = PETSC_FALSE; 3162 3163 if (pcbddc->benign_have_null) { 3164 PC coarse_pc; 3165 3166 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3167 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3168 /* we need to propagate to coarser levels the need for a possible benign correction */ 3169 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3170 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3171 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3172 } 3173 } 3174 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3175 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3176 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3177 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3178 if (nullsp) { 3179 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3180 } 3181 if (applytranspose) { 3182 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3183 } else { 3184 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3185 } 3186 /* we don't the benign correction at coarser levels anymore */ 3187 if (pcbddc->benign_have_null && isbddc) { 3188 PC coarse_pc; 3189 PC_BDDC* coarsepcbddc; 3190 3191 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3192 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3193 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3194 } 3195 if (nullsp) { 3196 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3197 } 3198 } 3199 3200 /* Local solution on R nodes */ 3201 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3202 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3203 } 3204 /* communications from coarse sol to local primal nodes */ 3205 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3206 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3207 3208 /* Sum contributions from the two levels */ 3209 if (!pcbddc->benign_apply_coarse_only) { 3210 if (applytranspose) { 3211 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3212 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3213 } else { 3214 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3215 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3216 } 3217 /* store p0 */ 3218 if (pcbddc->benign_n) { 3219 PetscScalar *array; 3220 PetscInt j; 3221 3222 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3223 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3224 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3225 } 3226 } else { /* expand the coarse solution */ 3227 if (applytranspose) { 3228 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3229 } else { 3230 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3231 } 3232 } 3233 PetscFunctionReturn(0); 3234 } 3235 3236 #undef __FUNCT__ 3237 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3238 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3239 { 3240 PetscErrorCode ierr; 3241 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3242 PetscScalar *array; 3243 Vec from,to; 3244 3245 PetscFunctionBegin; 3246 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3247 from = pcbddc->coarse_vec; 3248 to = pcbddc->vec1_P; 3249 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3250 Vec tvec; 3251 3252 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3253 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3254 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3255 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3256 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3257 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3258 } 3259 } else { /* from local to global -> put data in coarse right hand side */ 3260 from = pcbddc->vec1_P; 3261 to = pcbddc->coarse_vec; 3262 } 3263 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3269 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3270 { 3271 PetscErrorCode ierr; 3272 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3273 PetscScalar *array; 3274 Vec from,to; 3275 3276 PetscFunctionBegin; 3277 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3278 from = pcbddc->coarse_vec; 3279 to = pcbddc->vec1_P; 3280 } else { /* from local to global -> put data in coarse right hand side */ 3281 from = pcbddc->vec1_P; 3282 to = pcbddc->coarse_vec; 3283 } 3284 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3285 if (smode == SCATTER_FORWARD) { 3286 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3287 Vec tvec; 3288 3289 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3290 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3291 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3292 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3293 } 3294 } else { 3295 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3296 ierr = VecResetArray(from);CHKERRQ(ierr); 3297 } 3298 } 3299 PetscFunctionReturn(0); 3300 } 3301 3302 /* uncomment for testing purposes */ 3303 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3304 #undef __FUNCT__ 3305 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3306 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3307 { 3308 PetscErrorCode ierr; 3309 PC_IS* pcis = (PC_IS*)(pc->data); 3310 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3311 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3312 /* one and zero */ 3313 PetscScalar one=1.0,zero=0.0; 3314 /* space to store constraints and their local indices */ 3315 PetscScalar *constraints_data; 3316 PetscInt *constraints_idxs,*constraints_idxs_B; 3317 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3318 PetscInt *constraints_n; 3319 /* iterators */ 3320 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3321 /* BLAS integers */ 3322 PetscBLASInt lwork,lierr; 3323 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3324 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3325 /* reuse */ 3326 PetscInt olocal_primal_size,olocal_primal_size_cc; 3327 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3328 /* change of basis */ 3329 PetscBool qr_needed; 3330 PetscBT change_basis,qr_needed_idx; 3331 /* auxiliary stuff */ 3332 PetscInt *nnz,*is_indices; 3333 PetscInt ncc; 3334 /* some quantities */ 3335 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3336 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3337 3338 PetscFunctionBegin; 3339 /* Destroy Mat objects computed previously */ 3340 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3341 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3342 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3343 /* save info on constraints from previous setup (if any) */ 3344 olocal_primal_size = pcbddc->local_primal_size; 3345 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3346 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3347 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3348 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3349 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3350 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3351 3352 if (!pcbddc->adaptive_selection) { 3353 IS ISForVertices,*ISForFaces,*ISForEdges; 3354 MatNullSpace nearnullsp; 3355 const Vec *nearnullvecs; 3356 Vec *localnearnullsp; 3357 PetscScalar *array; 3358 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3359 PetscBool nnsp_has_cnst; 3360 /* LAPACK working arrays for SVD or POD */ 3361 PetscBool skip_lapack,boolforchange; 3362 PetscScalar *work; 3363 PetscReal *singular_vals; 3364 #if defined(PETSC_USE_COMPLEX) 3365 PetscReal *rwork; 3366 #endif 3367 #if defined(PETSC_MISSING_LAPACK_GESVD) 3368 PetscScalar *temp_basis,*correlation_mat; 3369 #else 3370 PetscBLASInt dummy_int=1; 3371 PetscScalar dummy_scalar=1.; 3372 #endif 3373 3374 /* Get index sets for faces, edges and vertices from graph */ 3375 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3376 /* print some info */ 3377 if (pcbddc->dbg_flag) { 3378 PetscInt nv; 3379 3380 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3381 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3382 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3384 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3385 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3386 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3388 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3389 } 3390 3391 /* free unneeded index sets */ 3392 if (!pcbddc->use_vertices) { 3393 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3394 } 3395 if (!pcbddc->use_edges) { 3396 for (i=0;i<n_ISForEdges;i++) { 3397 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3398 } 3399 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3400 n_ISForEdges = 0; 3401 } 3402 if (!pcbddc->use_faces) { 3403 for (i=0;i<n_ISForFaces;i++) { 3404 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3405 } 3406 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3407 n_ISForFaces = 0; 3408 } 3409 3410 #if defined(PETSC_USE_DEBUG) 3411 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 3412 Also use_change_of_basis should be consistent among processors */ 3413 if (pcbddc->NullSpace) { 3414 PetscBool tbool[2],gbool[2]; 3415 3416 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 3417 pcbddc->use_change_of_basis = PETSC_TRUE; 3418 if (!ISForEdges) { 3419 pcbddc->use_change_on_faces = PETSC_TRUE; 3420 } 3421 } 3422 tbool[0] = pcbddc->use_change_of_basis; 3423 tbool[1] = pcbddc->use_change_on_faces; 3424 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3425 pcbddc->use_change_of_basis = gbool[0]; 3426 pcbddc->use_change_on_faces = gbool[1]; 3427 } 3428 #endif 3429 3430 /* check if near null space is attached to global mat */ 3431 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3432 if (nearnullsp) { 3433 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3434 /* remove any stored info */ 3435 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3436 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3437 /* store information for BDDC solver reuse */ 3438 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3439 pcbddc->onearnullspace = nearnullsp; 3440 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3441 for (i=0;i<nnsp_size;i++) { 3442 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3443 } 3444 } else { /* if near null space is not provided BDDC uses constants by default */ 3445 nnsp_size = 0; 3446 nnsp_has_cnst = PETSC_TRUE; 3447 } 3448 /* get max number of constraints on a single cc */ 3449 max_constraints = nnsp_size; 3450 if (nnsp_has_cnst) max_constraints++; 3451 3452 /* 3453 Evaluate maximum storage size needed by the procedure 3454 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3455 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3456 There can be multiple constraints per connected component 3457 */ 3458 n_vertices = 0; 3459 if (ISForVertices) { 3460 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3461 } 3462 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3463 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3464 3465 total_counts = n_ISForFaces+n_ISForEdges; 3466 total_counts *= max_constraints; 3467 total_counts += n_vertices; 3468 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3469 3470 total_counts = 0; 3471 max_size_of_constraint = 0; 3472 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3473 IS used_is; 3474 if (i<n_ISForEdges) { 3475 used_is = ISForEdges[i]; 3476 } else { 3477 used_is = ISForFaces[i-n_ISForEdges]; 3478 } 3479 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3480 total_counts += j; 3481 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3482 } 3483 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3484 3485 /* get local part of global near null space vectors */ 3486 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3487 for (k=0;k<nnsp_size;k++) { 3488 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3489 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3490 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3491 } 3492 3493 /* whether or not to skip lapack calls */ 3494 skip_lapack = PETSC_TRUE; 3495 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3496 3497 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3498 if (!skip_lapack) { 3499 PetscScalar temp_work; 3500 3501 #if defined(PETSC_MISSING_LAPACK_GESVD) 3502 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3503 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3504 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3505 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3506 #if defined(PETSC_USE_COMPLEX) 3507 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3508 #endif 3509 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3510 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3511 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3512 lwork = -1; 3513 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3514 #if !defined(PETSC_USE_COMPLEX) 3515 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3516 #else 3517 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3518 #endif 3519 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3520 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3521 #else /* on missing GESVD */ 3522 /* SVD */ 3523 PetscInt max_n,min_n; 3524 max_n = max_size_of_constraint; 3525 min_n = max_constraints; 3526 if (max_size_of_constraint < max_constraints) { 3527 min_n = max_size_of_constraint; 3528 max_n = max_constraints; 3529 } 3530 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3531 #if defined(PETSC_USE_COMPLEX) 3532 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3533 #endif 3534 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3535 lwork = -1; 3536 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3537 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3538 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3539 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3540 #if !defined(PETSC_USE_COMPLEX) 3541 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3542 #else 3543 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3544 #endif 3545 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3546 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3547 #endif /* on missing GESVD */ 3548 /* Allocate optimal workspace */ 3549 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3550 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3551 } 3552 /* Now we can loop on constraining sets */ 3553 total_counts = 0; 3554 constraints_idxs_ptr[0] = 0; 3555 constraints_data_ptr[0] = 0; 3556 /* vertices */ 3557 if (n_vertices) { 3558 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3559 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3560 for (i=0;i<n_vertices;i++) { 3561 constraints_n[total_counts] = 1; 3562 constraints_data[total_counts] = 1.0; 3563 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3564 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3565 total_counts++; 3566 } 3567 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3568 n_vertices = total_counts; 3569 } 3570 3571 /* edges and faces */ 3572 total_counts_cc = total_counts; 3573 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3574 IS used_is; 3575 PetscBool idxs_copied = PETSC_FALSE; 3576 3577 if (ncc<n_ISForEdges) { 3578 used_is = ISForEdges[ncc]; 3579 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3580 } else { 3581 used_is = ISForFaces[ncc-n_ISForEdges]; 3582 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3583 } 3584 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3585 3586 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3587 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3588 /* change of basis should not be performed on local periodic nodes */ 3589 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3590 if (nnsp_has_cnst) { 3591 PetscScalar quad_value; 3592 3593 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3594 idxs_copied = PETSC_TRUE; 3595 3596 if (!pcbddc->use_nnsp_true) { 3597 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3598 } else { 3599 quad_value = 1.0; 3600 } 3601 for (j=0;j<size_of_constraint;j++) { 3602 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3603 } 3604 temp_constraints++; 3605 total_counts++; 3606 } 3607 for (k=0;k<nnsp_size;k++) { 3608 PetscReal real_value; 3609 PetscScalar *ptr_to_data; 3610 3611 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3612 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3613 for (j=0;j<size_of_constraint;j++) { 3614 ptr_to_data[j] = array[is_indices[j]]; 3615 } 3616 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3617 /* check if array is null on the connected component */ 3618 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3619 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3620 if (real_value > 0.0) { /* keep indices and values */ 3621 temp_constraints++; 3622 total_counts++; 3623 if (!idxs_copied) { 3624 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3625 idxs_copied = PETSC_TRUE; 3626 } 3627 } 3628 } 3629 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3630 valid_constraints = temp_constraints; 3631 if (!pcbddc->use_nnsp_true && temp_constraints) { 3632 if (temp_constraints == 1) { /* just normalize the constraint */ 3633 PetscScalar norm,*ptr_to_data; 3634 3635 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3636 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3637 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3638 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3639 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3640 } else { /* perform SVD */ 3641 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3642 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3643 3644 #if defined(PETSC_MISSING_LAPACK_GESVD) 3645 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3646 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3647 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3648 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3649 from that computed using LAPACKgesvd 3650 -> This is due to a different computation of eigenvectors in LAPACKheev 3651 -> The quality of the POD-computed basis will be the same */ 3652 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3653 /* Store upper triangular part of correlation matrix */ 3654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3655 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3656 for (j=0;j<temp_constraints;j++) { 3657 for (k=0;k<j+1;k++) { 3658 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3659 } 3660 } 3661 /* compute eigenvalues and eigenvectors of correlation matrix */ 3662 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3663 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3664 #if !defined(PETSC_USE_COMPLEX) 3665 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3666 #else 3667 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3668 #endif 3669 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3670 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3671 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3672 j = 0; 3673 while (j < temp_constraints && singular_vals[j] < tol) j++; 3674 total_counts = total_counts-j; 3675 valid_constraints = temp_constraints-j; 3676 /* scale and copy POD basis into used quadrature memory */ 3677 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3678 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3679 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3680 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3681 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3682 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3683 if (j<temp_constraints) { 3684 PetscInt ii; 3685 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3686 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3687 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3688 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3689 for (k=0;k<temp_constraints-j;k++) { 3690 for (ii=0;ii<size_of_constraint;ii++) { 3691 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3692 } 3693 } 3694 } 3695 #else /* on missing GESVD */ 3696 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3697 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3698 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3699 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3700 #if !defined(PETSC_USE_COMPLEX) 3701 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3702 #else 3703 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3704 #endif 3705 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3706 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3707 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3708 k = temp_constraints; 3709 if (k > size_of_constraint) k = size_of_constraint; 3710 j = 0; 3711 while (j < k && singular_vals[k-j-1] < tol) j++; 3712 valid_constraints = k-j; 3713 total_counts = total_counts-temp_constraints+valid_constraints; 3714 #endif /* on missing GESVD */ 3715 } 3716 } 3717 /* update pointers information */ 3718 if (valid_constraints) { 3719 constraints_n[total_counts_cc] = valid_constraints; 3720 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3721 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3722 /* set change_of_basis flag */ 3723 if (boolforchange) { 3724 PetscBTSet(change_basis,total_counts_cc); 3725 } 3726 total_counts_cc++; 3727 } 3728 } 3729 /* free workspace */ 3730 if (!skip_lapack) { 3731 ierr = PetscFree(work);CHKERRQ(ierr); 3732 #if defined(PETSC_USE_COMPLEX) 3733 ierr = PetscFree(rwork);CHKERRQ(ierr); 3734 #endif 3735 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3736 #if defined(PETSC_MISSING_LAPACK_GESVD) 3737 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3738 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3739 #endif 3740 } 3741 for (k=0;k<nnsp_size;k++) { 3742 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3743 } 3744 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3745 /* free index sets of faces, edges and vertices */ 3746 for (i=0;i<n_ISForFaces;i++) { 3747 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3748 } 3749 if (n_ISForFaces) { 3750 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3751 } 3752 for (i=0;i<n_ISForEdges;i++) { 3753 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3754 } 3755 if (n_ISForEdges) { 3756 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3757 } 3758 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3759 } else { 3760 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3761 3762 total_counts = 0; 3763 n_vertices = 0; 3764 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3765 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3766 } 3767 max_constraints = 0; 3768 total_counts_cc = 0; 3769 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3770 total_counts += pcbddc->adaptive_constraints_n[i]; 3771 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3772 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3773 } 3774 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3775 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3776 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3777 constraints_data = pcbddc->adaptive_constraints_data; 3778 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3779 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3780 total_counts_cc = 0; 3781 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3782 if (pcbddc->adaptive_constraints_n[i]) { 3783 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3784 } 3785 } 3786 #if 0 3787 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3788 for (i=0;i<total_counts_cc;i++) { 3789 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3790 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3791 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3792 printf(" %d",constraints_idxs[j]); 3793 } 3794 printf("\n"); 3795 printf("number of cc: %d\n",constraints_n[i]); 3796 } 3797 for (i=0;i<n_vertices;i++) { 3798 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3799 } 3800 for (i=0;i<sub_schurs->n_subs;i++) { 3801 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3802 } 3803 #endif 3804 3805 max_size_of_constraint = 0; 3806 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3807 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3808 /* Change of basis */ 3809 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3810 if (pcbddc->use_change_of_basis) { 3811 for (i=0;i<sub_schurs->n_subs;i++) { 3812 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3813 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3814 } 3815 } 3816 } 3817 } 3818 pcbddc->local_primal_size = total_counts; 3819 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3820 3821 /* map constraints_idxs in boundary numbering */ 3822 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3823 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 3824 3825 /* Create constraint matrix */ 3826 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3827 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3828 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3829 3830 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3831 /* determine if a QR strategy is needed for change of basis */ 3832 qr_needed = PETSC_FALSE; 3833 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3834 total_primal_vertices=0; 3835 pcbddc->local_primal_size_cc = 0; 3836 for (i=0;i<total_counts_cc;i++) { 3837 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3838 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 3839 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3840 pcbddc->local_primal_size_cc += 1; 3841 } else if (PetscBTLookup(change_basis,i)) { 3842 for (k=0;k<constraints_n[i];k++) { 3843 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3844 } 3845 pcbddc->local_primal_size_cc += constraints_n[i]; 3846 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 3847 PetscBTSet(qr_needed_idx,i); 3848 qr_needed = PETSC_TRUE; 3849 } 3850 } else { 3851 pcbddc->local_primal_size_cc += 1; 3852 } 3853 } 3854 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3855 pcbddc->n_vertices = total_primal_vertices; 3856 /* permute indices in order to have a sorted set of vertices */ 3857 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3858 3859 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3860 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3861 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3862 3863 /* nonzero structure of constraint matrix */ 3864 /* and get reference dof for local constraints */ 3865 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3866 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3867 3868 j = total_primal_vertices; 3869 total_counts = total_primal_vertices; 3870 cum = total_primal_vertices; 3871 for (i=n_vertices;i<total_counts_cc;i++) { 3872 if (!PetscBTLookup(change_basis,i)) { 3873 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3874 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3875 cum++; 3876 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3877 for (k=0;k<constraints_n[i];k++) { 3878 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3879 nnz[j+k] = size_of_constraint; 3880 } 3881 j += constraints_n[i]; 3882 } 3883 } 3884 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3885 ierr = PetscFree(nnz);CHKERRQ(ierr); 3886 3887 /* set values in constraint matrix */ 3888 for (i=0;i<total_primal_vertices;i++) { 3889 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3890 } 3891 total_counts = total_primal_vertices; 3892 for (i=n_vertices;i<total_counts_cc;i++) { 3893 if (!PetscBTLookup(change_basis,i)) { 3894 PetscInt *cols; 3895 3896 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3897 cols = constraints_idxs+constraints_idxs_ptr[i]; 3898 for (k=0;k<constraints_n[i];k++) { 3899 PetscInt row = total_counts+k; 3900 PetscScalar *vals; 3901 3902 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3903 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3904 } 3905 total_counts += constraints_n[i]; 3906 } 3907 } 3908 /* assembling */ 3909 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3910 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3911 3912 /* 3913 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3914 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3915 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 3916 */ 3917 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3918 if (pcbddc->use_change_of_basis) { 3919 /* dual and primal dofs on a single cc */ 3920 PetscInt dual_dofs,primal_dofs; 3921 /* working stuff for GEQRF */ 3922 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3923 PetscBLASInt lqr_work; 3924 /* working stuff for UNGQR */ 3925 PetscScalar *gqr_work,lgqr_work_t; 3926 PetscBLASInt lgqr_work; 3927 /* working stuff for TRTRS */ 3928 PetscScalar *trs_rhs; 3929 PetscBLASInt Blas_NRHS; 3930 /* pointers for values insertion into change of basis matrix */ 3931 PetscInt *start_rows,*start_cols; 3932 PetscScalar *start_vals; 3933 /* working stuff for values insertion */ 3934 PetscBT is_primal; 3935 PetscInt *aux_primal_numbering_B; 3936 /* matrix sizes */ 3937 PetscInt global_size,local_size; 3938 /* temporary change of basis */ 3939 Mat localChangeOfBasisMatrix; 3940 /* extra space for debugging */ 3941 PetscScalar *dbg_work; 3942 3943 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3944 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3945 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3946 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3947 /* nonzeros for local mat */ 3948 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3949 if (!pcbddc->benign_change || pcbddc->fake_change) { 3950 for (i=0;i<pcis->n;i++) nnz[i]=1; 3951 } else { 3952 const PetscInt *ii; 3953 PetscInt n; 3954 PetscBool flg_row; 3955 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3956 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 3957 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 3958 } 3959 for (i=n_vertices;i<total_counts_cc;i++) { 3960 if (PetscBTLookup(change_basis,i)) { 3961 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3962 if (PetscBTLookup(qr_needed_idx,i)) { 3963 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3964 } else { 3965 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3966 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3967 } 3968 } 3969 } 3970 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3971 ierr = PetscFree(nnz);CHKERRQ(ierr); 3972 /* Set interior change in the matrix */ 3973 if (!pcbddc->benign_change || pcbddc->fake_change) { 3974 for (i=0;i<pcis->n;i++) { 3975 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3976 } 3977 } else { 3978 const PetscInt *ii,*jj; 3979 PetscScalar *aa; 3980 PetscInt n; 3981 PetscBool flg_row; 3982 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3983 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3984 for (i=0;i<n;i++) { 3985 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 3986 } 3987 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 3988 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 3989 } 3990 3991 if (pcbddc->dbg_flag) { 3992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3994 } 3995 3996 3997 /* Now we loop on the constraints which need a change of basis */ 3998 /* 3999 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4000 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4001 4002 Basic blocks of change of basis matrix T computed by 4003 4004 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4005 4006 | 1 0 ... 0 s_1/S | 4007 | 0 1 ... 0 s_2/S | 4008 | ... | 4009 | 0 ... 1 s_{n-1}/S | 4010 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4011 4012 with S = \sum_{i=1}^n s_i^2 4013 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4014 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4015 4016 - QR decomposition of constraints otherwise 4017 */ 4018 if (qr_needed) { 4019 /* space to store Q */ 4020 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4021 /* first we issue queries for optimal work */ 4022 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4023 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4024 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4025 lqr_work = -1; 4026 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4027 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4028 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4029 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4030 lgqr_work = -1; 4031 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4032 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4033 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4034 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4035 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4036 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4037 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4038 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4039 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4040 /* array to store scaling factors for reflectors */ 4041 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4042 /* array to store rhs and solution of triangular solver */ 4043 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4044 /* allocating workspace for check */ 4045 if (pcbddc->dbg_flag) { 4046 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4047 } 4048 } 4049 /* array to store whether a node is primal or not */ 4050 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4051 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4052 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4053 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4054 for (i=0;i<total_primal_vertices;i++) { 4055 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4056 } 4057 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4058 4059 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4060 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4061 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4062 if (PetscBTLookup(change_basis,total_counts)) { 4063 /* get constraint info */ 4064 primal_dofs = constraints_n[total_counts]; 4065 dual_dofs = size_of_constraint-primal_dofs; 4066 4067 if (pcbddc->dbg_flag) { 4068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4069 } 4070 4071 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4072 4073 /* copy quadrature constraints for change of basis check */ 4074 if (pcbddc->dbg_flag) { 4075 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4076 } 4077 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4078 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4079 4080 /* compute QR decomposition of constraints */ 4081 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4082 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4083 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4084 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4085 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4086 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4087 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4088 4089 /* explictly compute R^-T */ 4090 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4091 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4092 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4093 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4095 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4096 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4097 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4098 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4099 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4100 4101 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4102 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4103 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4104 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4105 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4106 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4107 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4108 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4109 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4110 4111 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4112 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4113 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4115 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4116 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4117 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4118 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4119 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4121 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4123 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4124 4125 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4126 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4127 /* insert cols for primal dofs */ 4128 for (j=0;j<primal_dofs;j++) { 4129 start_vals = &qr_basis[j*size_of_constraint]; 4130 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4131 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4132 } 4133 /* insert cols for dual dofs */ 4134 for (j=0,k=0;j<dual_dofs;k++) { 4135 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4136 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4137 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4138 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4139 j++; 4140 } 4141 } 4142 4143 /* check change of basis */ 4144 if (pcbddc->dbg_flag) { 4145 PetscInt ii,jj; 4146 PetscBool valid_qr=PETSC_TRUE; 4147 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4148 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4149 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4150 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4151 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4152 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4153 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4154 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4155 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4156 for (jj=0;jj<size_of_constraint;jj++) { 4157 for (ii=0;ii<primal_dofs;ii++) { 4158 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4159 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4160 } 4161 } 4162 if (!valid_qr) { 4163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4164 for (jj=0;jj<size_of_constraint;jj++) { 4165 for (ii=0;ii<primal_dofs;ii++) { 4166 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4167 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4168 } 4169 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4170 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4171 } 4172 } 4173 } 4174 } else { 4175 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4176 } 4177 } 4178 } else { /* simple transformation block */ 4179 PetscInt row,col; 4180 PetscScalar val,norm; 4181 4182 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4183 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4184 for (j=0;j<size_of_constraint;j++) { 4185 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4186 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4187 if (!PetscBTLookup(is_primal,row_B)) { 4188 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4189 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4190 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4191 } else { 4192 for (k=0;k<size_of_constraint;k++) { 4193 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4194 if (row != col) { 4195 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4196 } else { 4197 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4198 } 4199 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4200 } 4201 } 4202 } 4203 if (pcbddc->dbg_flag) { 4204 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4205 } 4206 } 4207 } else { 4208 if (pcbddc->dbg_flag) { 4209 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4210 } 4211 } 4212 } 4213 4214 /* free workspace */ 4215 if (qr_needed) { 4216 if (pcbddc->dbg_flag) { 4217 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4218 } 4219 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4220 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4221 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4222 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4223 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4224 } 4225 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4226 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4227 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4228 4229 /* assembling of global change of variable */ 4230 if (!pcbddc->fake_change) { 4231 Mat tmat; 4232 PetscInt bs; 4233 4234 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4235 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4236 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4237 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4238 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4239 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4240 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4241 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4242 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4243 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4244 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4245 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4246 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4247 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4248 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4249 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4250 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4251 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4252 4253 /* check */ 4254 if (pcbddc->dbg_flag) { 4255 PetscReal error; 4256 Vec x,x_change; 4257 4258 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4259 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4260 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4261 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4262 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4263 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4264 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4265 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4266 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4267 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4268 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4269 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4272 ierr = VecDestroy(&x);CHKERRQ(ierr); 4273 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4274 } 4275 /* adapt sub_schurs computed (if any) */ 4276 if (pcbddc->use_deluxe_scaling) { 4277 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4278 4279 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4280 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4281 } 4282 if (sub_schurs->S_Ej_all) { 4283 Mat S_new,tmat; 4284 IS is_all_N,is_V_Sall = NULL; 4285 4286 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4287 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4288 if (pcbddc->deluxe_zerorows) { 4289 ISLocalToGlobalMapping NtoSall; 4290 IS is_V; 4291 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4292 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4293 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4294 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4295 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4296 } 4297 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4298 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4299 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4300 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4301 if (pcbddc->deluxe_zerorows) { 4302 const PetscScalar *array; 4303 const PetscInt *idxs_V,*idxs_all; 4304 PetscInt i,n_V; 4305 4306 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4307 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4308 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4309 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4310 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4311 for (i=0;i<n_V;i++) { 4312 PetscScalar val; 4313 PetscInt idx; 4314 4315 idx = idxs_V[i]; 4316 val = array[idxs_all[idxs_V[i]]]; 4317 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4318 } 4319 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4320 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4321 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4322 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4323 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4324 } 4325 sub_schurs->S_Ej_all = S_new; 4326 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4327 if (sub_schurs->sum_S_Ej_all) { 4328 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4329 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4330 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4331 if (pcbddc->deluxe_zerorows) { 4332 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4333 } 4334 sub_schurs->sum_S_Ej_all = S_new; 4335 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4336 } 4337 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4338 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4339 } 4340 /* destroy any change of basis context in sub_schurs */ 4341 if (sub_schurs->change) { 4342 PetscInt i; 4343 4344 for (i=0;i<sub_schurs->n_subs;i++) { 4345 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4346 } 4347 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4348 } 4349 } 4350 if (pcbddc->switch_static) { /* need to save the local change */ 4351 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4352 } else { 4353 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4354 } 4355 /* determine if any process has changed the pressures locally */ 4356 pcbddc->change_interior = pcbddc->benign_have_null; 4357 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4358 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4359 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4360 pcbddc->use_qr_single = qr_needed; 4361 } 4362 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4363 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4364 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4365 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4366 } else { 4367 Mat benign_global = NULL; 4368 if (pcbddc->benign_have_null) { 4369 Mat tmat; 4370 4371 pcbddc->change_interior = PETSC_TRUE; 4372 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4373 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4374 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4375 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4376 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4377 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4378 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4379 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4380 if (pcbddc->benign_change) { 4381 Mat M; 4382 4383 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4384 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4385 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4386 ierr = MatDestroy(&M);CHKERRQ(ierr); 4387 } else { 4388 Mat eye; 4389 PetscScalar *array; 4390 4391 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4392 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4393 for (i=0;i<pcis->n;i++) { 4394 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4395 } 4396 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4397 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4398 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4399 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4400 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4401 } 4402 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4403 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4404 } 4405 if (pcbddc->user_ChangeOfBasisMatrix) { 4406 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4407 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4408 } else if (pcbddc->benign_have_null) { 4409 pcbddc->ChangeOfBasisMatrix = benign_global; 4410 } 4411 } 4412 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4413 IS is_global; 4414 const PetscInt *gidxs; 4415 4416 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4417 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4418 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4419 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4420 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4421 } 4422 } 4423 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4424 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4425 } 4426 4427 if (!pcbddc->fake_change) { 4428 /* add pressure dofs to set of primal nodes for numbering purposes */ 4429 for (i=0;i<pcbddc->benign_n;i++) { 4430 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4431 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4432 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4433 pcbddc->local_primal_size_cc++; 4434 pcbddc->local_primal_size++; 4435 } 4436 4437 /* check if a new primal space has been introduced (also take into account benign trick) */ 4438 pcbddc->new_primal_space_local = PETSC_TRUE; 4439 if (olocal_primal_size == pcbddc->local_primal_size) { 4440 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4441 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4442 if (!pcbddc->new_primal_space_local) { 4443 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4444 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4445 } 4446 } 4447 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4448 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4449 } 4450 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4451 4452 /* flush dbg viewer */ 4453 if (pcbddc->dbg_flag) { 4454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4455 } 4456 4457 /* free workspace */ 4458 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4459 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4460 if (!pcbddc->adaptive_selection) { 4461 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4462 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4463 } else { 4464 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4465 pcbddc->adaptive_constraints_idxs_ptr, 4466 pcbddc->adaptive_constraints_data_ptr, 4467 pcbddc->adaptive_constraints_idxs, 4468 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4469 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4470 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4471 } 4472 PetscFunctionReturn(0); 4473 } 4474 4475 #undef __FUNCT__ 4476 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4477 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4478 { 4479 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4480 PC_IS *pcis = (PC_IS*)pc->data; 4481 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4482 PetscInt ierr,i,N; 4483 4484 PetscFunctionBegin; 4485 /* Reset previously computed graph */ 4486 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4487 /* Init local Graph struct */ 4488 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4489 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4490 4491 /* Check validity of the csr graph passed in by the user */ 4492 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4493 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4494 } 4495 4496 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4497 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4498 PetscInt *xadj,*adjncy; 4499 PetscInt nvtxs; 4500 PetscBool flg_row=PETSC_FALSE; 4501 4502 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4503 if (flg_row) { 4504 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4505 pcbddc->computed_rowadj = PETSC_TRUE; 4506 } 4507 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4508 } 4509 if (pcbddc->dbg_flag) { 4510 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4511 } 4512 4513 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 4514 if (pcbddc->user_provided_isfordofs) { 4515 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 4516 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4517 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4518 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4519 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 4520 } 4521 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 4522 pcbddc->n_ISForDofs = 0; 4523 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 4524 } 4525 } else { 4526 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 4527 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 4528 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 4529 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 4530 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 4531 } 4532 } 4533 } 4534 4535 /* Setup of Graph */ 4536 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 4537 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 4538 } 4539 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 4540 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 4541 } 4542 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 4543 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4544 } 4545 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4546 4547 /* attach info on disconnected subdomains if present */ 4548 if (pcbddc->n_local_subs) { 4549 PetscInt *local_subs; 4550 4551 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4552 for (i=0;i<pcbddc->n_local_subs;i++) { 4553 const PetscInt *idxs; 4554 PetscInt nl,j; 4555 4556 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4557 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4558 for (j=0;j<nl;j++) { 4559 local_subs[idxs[j]] = i; 4560 } 4561 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4562 } 4563 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4564 pcbddc->mat_graph->local_subs = local_subs; 4565 } 4566 4567 /* Graph's connected components analysis */ 4568 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4569 PetscFunctionReturn(0); 4570 } 4571 4572 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4573 #undef __FUNCT__ 4574 #define __FUNCT__ "PCBDDCSubsetNumbering" 4575 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4576 { 4577 PetscSF sf; 4578 PetscLayout map; 4579 const PetscInt *idxs; 4580 PetscInt *leaf_data,*root_data,*gidxs; 4581 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4582 PetscInt n_n,nlocals,start,first_index; 4583 PetscMPIInt commsize; 4584 PetscBool first_found; 4585 PetscErrorCode ierr; 4586 4587 PetscFunctionBegin; 4588 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4589 if (subset_mult) { 4590 PetscCheckSameComm(subset,1,subset_mult,2); 4591 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4592 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4593 } 4594 /* create workspace layout for computing global indices of subset */ 4595 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4596 lbounds[0] = lbounds[1] = 0; 4597 for (i=0;i<n;i++) { 4598 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4599 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4600 } 4601 lbounds[0] = -lbounds[0]; 4602 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4603 gbounds[0] = -gbounds[0]; 4604 N = gbounds[1] - gbounds[0] + 1; 4605 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4606 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4607 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4608 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4609 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4610 4611 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4612 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4613 if (subset_mult) { 4614 const PetscInt* idxs_mult; 4615 4616 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4617 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4618 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4619 } else { 4620 for (i=0;i<n;i++) leaf_data[i] = 1; 4621 } 4622 /* local size of new subset */ 4623 n_n = 0; 4624 for (i=0;i<n;i++) n_n += leaf_data[i]; 4625 4626 /* global indexes in layout */ 4627 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4628 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4629 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4630 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4631 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4632 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4633 4634 /* reduce from leaves to roots */ 4635 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4636 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4637 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4638 4639 /* count indexes in local part of layout */ 4640 nlocals = 0; 4641 first_index = -1; 4642 first_found = PETSC_FALSE; 4643 for (i=0;i<Nl;i++) { 4644 if (!first_found && root_data[i]) { 4645 first_found = PETSC_TRUE; 4646 first_index = i; 4647 } 4648 nlocals += root_data[i]; 4649 } 4650 4651 /* cumulative of number of indexes and size of subset without holes */ 4652 #if defined(PETSC_HAVE_MPI_EXSCAN) 4653 start = 0; 4654 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4655 #else 4656 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4657 start = start-nlocals; 4658 #endif 4659 4660 if (N_n) { /* compute total size of new subset if requested */ 4661 *N_n = start + nlocals; 4662 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4663 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4664 } 4665 4666 /* adapt root data with cumulative */ 4667 if (first_found) { 4668 PetscInt old_index; 4669 4670 root_data[first_index] += start; 4671 old_index = first_index; 4672 for (i=first_index+1;i<Nl;i++) { 4673 if (root_data[i]) { 4674 root_data[i] += root_data[old_index]; 4675 old_index = i; 4676 } 4677 } 4678 } 4679 4680 /* from roots to leaves */ 4681 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4682 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4683 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4684 4685 /* create new IS with global indexes without holes */ 4686 if (subset_mult) { 4687 const PetscInt* idxs_mult; 4688 PetscInt cum; 4689 4690 cum = 0; 4691 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4692 for (i=0;i<n;i++) { 4693 PetscInt j; 4694 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4695 } 4696 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4697 } else { 4698 for (i=0;i<n;i++) { 4699 gidxs[i] = leaf_data[i]-1; 4700 } 4701 } 4702 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4703 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4704 PetscFunctionReturn(0); 4705 } 4706 4707 #undef __FUNCT__ 4708 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4709 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4710 { 4711 PetscInt i,j; 4712 PetscScalar *alphas; 4713 PetscErrorCode ierr; 4714 4715 PetscFunctionBegin; 4716 /* this implements stabilized Gram-Schmidt */ 4717 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4718 for (i=0;i<n;i++) { 4719 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4720 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4721 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4722 } 4723 ierr = PetscFree(alphas);CHKERRQ(ierr); 4724 PetscFunctionReturn(0); 4725 } 4726 4727 #undef __FUNCT__ 4728 #define __FUNCT__ "MatISGetSubassemblingPattern" 4729 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4730 { 4731 Mat A; 4732 PetscInt n_neighs,*neighs,*n_shared,**shared; 4733 PetscMPIInt size,rank,color; 4734 PetscInt *xadj,*adjncy; 4735 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4736 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4737 PetscInt void_procs,*procs_candidates = NULL; 4738 PetscInt xadj_count, *count; 4739 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4740 PetscSubcomm psubcomm; 4741 MPI_Comm subcomm; 4742 PetscErrorCode ierr; 4743 4744 PetscFunctionBegin; 4745 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4746 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4747 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4748 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4749 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4750 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4751 4752 if (have_void) *have_void = PETSC_FALSE; 4753 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4754 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4755 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4756 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4757 im_active = !!(n); 4758 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4759 void_procs = size - active_procs; 4760 /* get ranks of of non-active processes in mat communicator */ 4761 if (void_procs) { 4762 PetscInt ncand; 4763 4764 if (have_void) *have_void = PETSC_TRUE; 4765 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4766 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4767 for (i=0,ncand=0;i<size;i++) { 4768 if (!procs_candidates[i]) { 4769 procs_candidates[ncand++] = i; 4770 } 4771 } 4772 /* force n_subdomains to be not greater that the number of non-active processes */ 4773 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4774 } 4775 4776 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4777 if (active_procs < *n_subdomains) { 4778 PetscInt issize,isidx; 4779 if (im_active) { 4780 issize = 1; 4781 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4782 isidx = procs_candidates[rank]; 4783 } else { 4784 isidx = rank; 4785 } 4786 } else { 4787 issize = 0; 4788 isidx = -1; 4789 } 4790 *n_subdomains = active_procs; 4791 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4792 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4793 PetscFunctionReturn(0); 4794 } 4795 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4796 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4797 threshold = PetscMax(threshold,2); 4798 4799 /* Get info on mapping */ 4800 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4801 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4802 4803 /* build local CSR graph of subdomains' connectivity */ 4804 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4805 xadj[0] = 0; 4806 xadj[1] = PetscMax(n_neighs-1,0); 4807 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4808 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4809 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4810 for (i=1;i<n_neighs;i++) 4811 for (j=0;j<n_shared[i];j++) 4812 count[shared[i][j]] += 1; 4813 4814 xadj_count = 0; 4815 for (i=1;i<n_neighs;i++) { 4816 for (j=0;j<n_shared[i];j++) { 4817 if (count[shared[i][j]] < threshold) { 4818 adjncy[xadj_count] = neighs[i]; 4819 adjncy_wgt[xadj_count] = n_shared[i]; 4820 xadj_count++; 4821 break; 4822 } 4823 } 4824 } 4825 xadj[1] = xadj_count; 4826 ierr = PetscFree(count);CHKERRQ(ierr); 4827 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4828 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4829 4830 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4831 4832 /* Restrict work on active processes only */ 4833 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4834 if (void_procs) { 4835 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4836 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4837 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4838 subcomm = PetscSubcommChild(psubcomm); 4839 } else { 4840 psubcomm = NULL; 4841 subcomm = PetscObjectComm((PetscObject)mat); 4842 } 4843 4844 v_wgt = NULL; 4845 if (!color) { 4846 ierr = PetscFree(xadj);CHKERRQ(ierr); 4847 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4848 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4849 } else { 4850 Mat subdomain_adj; 4851 IS new_ranks,new_ranks_contig; 4852 MatPartitioning partitioner; 4853 PetscInt rstart=0,rend=0; 4854 PetscInt *is_indices,*oldranks; 4855 PetscMPIInt size; 4856 PetscBool aggregate; 4857 4858 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4859 if (void_procs) { 4860 PetscInt prank = rank; 4861 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4862 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4863 for (i=0;i<xadj[1];i++) { 4864 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4865 } 4866 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4867 } else { 4868 oldranks = NULL; 4869 } 4870 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4871 if (aggregate) { /* TODO: all this part could be made more efficient */ 4872 PetscInt lrows,row,ncols,*cols; 4873 PetscMPIInt nrank; 4874 PetscScalar *vals; 4875 4876 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 4877 lrows = 0; 4878 if (nrank<redprocs) { 4879 lrows = size/redprocs; 4880 if (nrank<size%redprocs) lrows++; 4881 } 4882 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4883 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4884 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4885 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4886 row = nrank; 4887 ncols = xadj[1]-xadj[0]; 4888 cols = adjncy; 4889 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4890 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4891 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4892 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4893 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4894 ierr = PetscFree(xadj);CHKERRQ(ierr); 4895 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4896 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4897 ierr = PetscFree(vals);CHKERRQ(ierr); 4898 if (use_vwgt) { 4899 Vec v; 4900 const PetscScalar *array; 4901 PetscInt nl; 4902 4903 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 4904 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 4905 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 4906 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 4907 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 4908 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 4909 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 4910 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 4911 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 4912 ierr = VecDestroy(&v);CHKERRQ(ierr); 4913 } 4914 } else { 4915 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4916 if (use_vwgt) { 4917 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4918 v_wgt[0] = local_size; 4919 } 4920 } 4921 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4922 4923 /* Partition */ 4924 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 4925 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4926 if (v_wgt) { 4927 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4928 } 4929 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 4930 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 4931 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4932 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4933 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4934 4935 /* renumber new_ranks to avoid "holes" in new set of processors */ 4936 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4937 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4938 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4939 if (!aggregate) { 4940 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4941 #if defined(PETSC_USE_DEBUG) 4942 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 4943 #endif 4944 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 4945 } else if (oldranks) { 4946 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4947 } else { 4948 ranks_send_to_idx[0] = is_indices[0]; 4949 } 4950 } else { 4951 PetscInt idxs[1]; 4952 PetscMPIInt tag; 4953 MPI_Request *reqs; 4954 4955 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4956 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4957 for (i=rstart;i<rend;i++) { 4958 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 4959 } 4960 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 4961 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4962 ierr = PetscFree(reqs);CHKERRQ(ierr); 4963 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4964 #if defined(PETSC_USE_DEBUG) 4965 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 4966 #endif 4967 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 4968 } else if (oldranks) { 4969 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4970 } else { 4971 ranks_send_to_idx[0] = idxs[0]; 4972 } 4973 } 4974 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4975 /* clean up */ 4976 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4977 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4978 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4979 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4980 } 4981 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 4982 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4983 4984 /* assemble parallel IS for sends */ 4985 i = 1; 4986 if (!color) i=0; 4987 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 4988 PetscFunctionReturn(0); 4989 } 4990 4991 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4992 4993 #undef __FUNCT__ 4994 #define __FUNCT__ "MatISSubassemble" 4995 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4996 { 4997 Mat local_mat; 4998 IS is_sends_internal; 4999 PetscInt rows,cols,new_local_rows; 5000 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 5001 PetscBool ismatis,isdense,newisdense,destroy_mat; 5002 ISLocalToGlobalMapping l2gmap; 5003 PetscInt* l2gmap_indices; 5004 const PetscInt* is_indices; 5005 MatType new_local_type; 5006 /* buffers */ 5007 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5008 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5009 PetscInt *recv_buffer_idxs_local; 5010 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5011 /* MPI */ 5012 MPI_Comm comm,comm_n; 5013 PetscSubcomm subcomm; 5014 PetscMPIInt n_sends,n_recvs,commsize; 5015 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5016 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5017 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 5018 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 5019 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 5020 PetscErrorCode ierr; 5021 5022 PetscFunctionBegin; 5023 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5024 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5025 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5026 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5027 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5028 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5029 PetscValidLogicalCollectiveBool(mat,reuse,6); 5030 PetscValidLogicalCollectiveInt(mat,nis,8); 5031 5032 /* further checks */ 5033 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5034 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5035 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5036 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5037 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5038 if (reuse && *mat_n) { 5039 PetscInt mrows,mcols,mnrows,mncols; 5040 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5041 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5042 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5043 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5044 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5045 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5046 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5047 } 5048 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5049 PetscValidLogicalCollectiveInt(mat,bs,0); 5050 5051 /* prepare IS for sending if not provided */ 5052 if (!is_sends) { 5053 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5054 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5055 } else { 5056 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5057 is_sends_internal = is_sends; 5058 } 5059 5060 /* get comm */ 5061 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5062 5063 /* compute number of sends */ 5064 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5065 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5066 5067 /* compute number of receives */ 5068 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5069 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5070 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5071 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5072 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5073 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5074 ierr = PetscFree(iflags);CHKERRQ(ierr); 5075 5076 /* restrict comm if requested */ 5077 subcomm = 0; 5078 destroy_mat = PETSC_FALSE; 5079 if (restrict_comm) { 5080 PetscMPIInt color,subcommsize; 5081 5082 color = 0; 5083 if (restrict_full) { 5084 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5085 } else { 5086 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5087 } 5088 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5089 subcommsize = commsize - subcommsize; 5090 /* check if reuse has been requested */ 5091 if (reuse) { 5092 if (*mat_n) { 5093 PetscMPIInt subcommsize2; 5094 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5095 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5096 comm_n = PetscObjectComm((PetscObject)*mat_n); 5097 } else { 5098 comm_n = PETSC_COMM_SELF; 5099 } 5100 } else { /* MAT_INITIAL_MATRIX */ 5101 PetscMPIInt rank; 5102 5103 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5104 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5105 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5106 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5107 comm_n = PetscSubcommChild(subcomm); 5108 } 5109 /* flag to destroy *mat_n if not significative */ 5110 if (color) destroy_mat = PETSC_TRUE; 5111 } else { 5112 comm_n = comm; 5113 } 5114 5115 /* prepare send/receive buffers */ 5116 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5117 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5118 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5119 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5120 if (nis) { 5121 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5122 } 5123 5124 /* Get data from local matrices */ 5125 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5126 /* TODO: See below some guidelines on how to prepare the local buffers */ 5127 /* 5128 send_buffer_vals should contain the raw values of the local matrix 5129 send_buffer_idxs should contain: 5130 - MatType_PRIVATE type 5131 - PetscInt size_of_l2gmap 5132 - PetscInt global_row_indices[size_of_l2gmap] 5133 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5134 */ 5135 else { 5136 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5137 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5138 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5139 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5140 send_buffer_idxs[1] = i; 5141 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5142 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5143 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5144 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5145 for (i=0;i<n_sends;i++) { 5146 ilengths_vals[is_indices[i]] = len*len; 5147 ilengths_idxs[is_indices[i]] = len+2; 5148 } 5149 } 5150 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5151 /* additional is (if any) */ 5152 if (nis) { 5153 PetscMPIInt psum; 5154 PetscInt j; 5155 for (j=0,psum=0;j<nis;j++) { 5156 PetscInt plen; 5157 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5158 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5159 psum += len+1; /* indices + lenght */ 5160 } 5161 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5162 for (j=0,psum=0;j<nis;j++) { 5163 PetscInt plen; 5164 const PetscInt *is_array_idxs; 5165 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5166 send_buffer_idxs_is[psum] = plen; 5167 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5168 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5169 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5170 psum += plen+1; /* indices + lenght */ 5171 } 5172 for (i=0;i<n_sends;i++) { 5173 ilengths_idxs_is[is_indices[i]] = psum; 5174 } 5175 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5176 } 5177 5178 buf_size_idxs = 0; 5179 buf_size_vals = 0; 5180 buf_size_idxs_is = 0; 5181 for (i=0;i<n_recvs;i++) { 5182 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5183 buf_size_vals += (PetscInt)olengths_vals[i]; 5184 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5185 } 5186 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5187 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5188 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5189 5190 /* get new tags for clean communications */ 5191 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5192 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5193 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5194 5195 /* allocate for requests */ 5196 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5197 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5198 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5199 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5200 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5201 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5202 5203 /* communications */ 5204 ptr_idxs = recv_buffer_idxs; 5205 ptr_vals = recv_buffer_vals; 5206 ptr_idxs_is = recv_buffer_idxs_is; 5207 for (i=0;i<n_recvs;i++) { 5208 source_dest = onodes[i]; 5209 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5210 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5211 ptr_idxs += olengths_idxs[i]; 5212 ptr_vals += olengths_vals[i]; 5213 if (nis) { 5214 source_dest = onodes_is[i]; 5215 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5216 ptr_idxs_is += olengths_idxs_is[i]; 5217 } 5218 } 5219 for (i=0;i<n_sends;i++) { 5220 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5221 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5222 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5223 if (nis) { 5224 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5225 } 5226 } 5227 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5228 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5229 5230 /* assemble new l2g map */ 5231 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5232 ptr_idxs = recv_buffer_idxs; 5233 new_local_rows = 0; 5234 for (i=0;i<n_recvs;i++) { 5235 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5236 ptr_idxs += olengths_idxs[i]; 5237 } 5238 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5239 ptr_idxs = recv_buffer_idxs; 5240 new_local_rows = 0; 5241 for (i=0;i<n_recvs;i++) { 5242 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5243 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5244 ptr_idxs += olengths_idxs[i]; 5245 } 5246 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5247 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5248 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5249 5250 /* infer new local matrix type from received local matrices type */ 5251 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5252 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5253 if (n_recvs) { 5254 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5255 ptr_idxs = recv_buffer_idxs; 5256 for (i=0;i<n_recvs;i++) { 5257 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5258 new_local_type_private = MATAIJ_PRIVATE; 5259 break; 5260 } 5261 ptr_idxs += olengths_idxs[i]; 5262 } 5263 switch (new_local_type_private) { 5264 case MATDENSE_PRIVATE: 5265 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5266 new_local_type = MATSEQAIJ; 5267 bs = 1; 5268 } else { /* if I receive only 1 dense matrix */ 5269 new_local_type = MATSEQDENSE; 5270 bs = 1; 5271 } 5272 break; 5273 case MATAIJ_PRIVATE: 5274 new_local_type = MATSEQAIJ; 5275 bs = 1; 5276 break; 5277 case MATBAIJ_PRIVATE: 5278 new_local_type = MATSEQBAIJ; 5279 break; 5280 case MATSBAIJ_PRIVATE: 5281 new_local_type = MATSEQSBAIJ; 5282 break; 5283 default: 5284 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5285 break; 5286 } 5287 } else { /* by default, new_local_type is seqdense */ 5288 new_local_type = MATSEQDENSE; 5289 bs = 1; 5290 } 5291 5292 /* create MATIS object if needed */ 5293 if (!reuse) { 5294 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5295 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5296 } else { 5297 /* it also destroys the local matrices */ 5298 if (*mat_n) { 5299 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5300 } else { /* this is a fake object */ 5301 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5302 } 5303 } 5304 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5305 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5306 5307 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5308 5309 /* Global to local map of received indices */ 5310 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5311 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5312 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5313 5314 /* restore attributes -> type of incoming data and its size */ 5315 buf_size_idxs = 0; 5316 for (i=0;i<n_recvs;i++) { 5317 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5318 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5319 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5320 } 5321 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5322 5323 /* set preallocation */ 5324 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5325 if (!newisdense) { 5326 PetscInt *new_local_nnz=0; 5327 5328 ptr_vals = recv_buffer_vals; 5329 ptr_idxs = recv_buffer_idxs_local; 5330 if (n_recvs) { 5331 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5332 } 5333 for (i=0;i<n_recvs;i++) { 5334 PetscInt j; 5335 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5336 for (j=0;j<*(ptr_idxs+1);j++) { 5337 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5338 } 5339 } else { 5340 /* TODO */ 5341 } 5342 ptr_idxs += olengths_idxs[i]; 5343 } 5344 if (new_local_nnz) { 5345 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5346 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5347 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5348 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5349 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5350 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5351 } else { 5352 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5353 } 5354 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5355 } else { 5356 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5357 } 5358 5359 /* set values */ 5360 ptr_vals = recv_buffer_vals; 5361 ptr_idxs = recv_buffer_idxs_local; 5362 for (i=0;i<n_recvs;i++) { 5363 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5364 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5365 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5366 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5367 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5368 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5369 } else { 5370 /* TODO */ 5371 } 5372 ptr_idxs += olengths_idxs[i]; 5373 ptr_vals += olengths_vals[i]; 5374 } 5375 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5376 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5377 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5378 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5379 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5380 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5381 5382 #if 0 5383 if (!restrict_comm) { /* check */ 5384 Vec lvec,rvec; 5385 PetscReal infty_error; 5386 5387 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5388 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5389 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5390 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5391 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5392 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5393 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5394 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5395 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5396 } 5397 #endif 5398 5399 /* assemble new additional is (if any) */ 5400 if (nis) { 5401 PetscInt **temp_idxs,*count_is,j,psum; 5402 5403 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5404 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5405 ptr_idxs = recv_buffer_idxs_is; 5406 psum = 0; 5407 for (i=0;i<n_recvs;i++) { 5408 for (j=0;j<nis;j++) { 5409 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5410 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5411 psum += plen; 5412 ptr_idxs += plen+1; /* shift pointer to received data */ 5413 } 5414 } 5415 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5416 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5417 for (i=1;i<nis;i++) { 5418 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5419 } 5420 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5421 ptr_idxs = recv_buffer_idxs_is; 5422 for (i=0;i<n_recvs;i++) { 5423 for (j=0;j<nis;j++) { 5424 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5425 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5426 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5427 ptr_idxs += plen+1; /* shift pointer to received data */ 5428 } 5429 } 5430 for (i=0;i<nis;i++) { 5431 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5432 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5433 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5434 } 5435 ierr = PetscFree(count_is);CHKERRQ(ierr); 5436 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5437 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5438 } 5439 /* free workspace */ 5440 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5441 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5442 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5443 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5444 if (isdense) { 5445 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5446 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5447 } else { 5448 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5449 } 5450 if (nis) { 5451 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5452 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5453 } 5454 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5455 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5456 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5457 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5458 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5459 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5460 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5461 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5462 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5463 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5464 ierr = PetscFree(onodes);CHKERRQ(ierr); 5465 if (nis) { 5466 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5467 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5468 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5469 } 5470 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5471 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5472 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5473 for (i=0;i<nis;i++) { 5474 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5475 } 5476 *mat_n = NULL; 5477 } 5478 PetscFunctionReturn(0); 5479 } 5480 5481 /* temporary hack into ksp private data structure */ 5482 #include <petsc/private/kspimpl.h> 5483 5484 #undef __FUNCT__ 5485 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5486 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5487 { 5488 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5489 PC_IS *pcis = (PC_IS*)pc->data; 5490 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5491 MatNullSpace CoarseNullSpace=NULL; 5492 ISLocalToGlobalMapping coarse_islg; 5493 IS coarse_is,*isarray; 5494 PetscInt i,im_active=-1,active_procs=-1; 5495 PetscInt nis,nisdofs,nisneu,nisvert; 5496 PC pc_temp; 5497 PCType coarse_pc_type; 5498 KSPType coarse_ksp_type; 5499 PetscBool multilevel_requested,multilevel_allowed; 5500 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5501 Mat t_coarse_mat_is; 5502 PetscInt ncoarse; 5503 PetscBool compute_vecs = PETSC_FALSE; 5504 PetscScalar *array; 5505 MatReuse coarse_mat_reuse; 5506 PetscBool restr, full_restr, have_void; 5507 PetscErrorCode ierr; 5508 5509 PetscFunctionBegin; 5510 /* Assign global numbering to coarse dofs */ 5511 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5512 PetscInt ocoarse_size; 5513 compute_vecs = PETSC_TRUE; 5514 ocoarse_size = pcbddc->coarse_size; 5515 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5516 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5517 /* see if we can avoid some work */ 5518 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5519 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5520 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5521 PC pc; 5522 PetscBool isbddc; 5523 5524 /* temporary workaround since PCBDDC does not have a reset method so far */ 5525 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5526 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5527 if (isbddc) { 5528 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5529 } else { 5530 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5531 } 5532 coarse_reuse = PETSC_FALSE; 5533 } else { /* we can safely reuse already computed coarse matrix */ 5534 coarse_reuse = PETSC_TRUE; 5535 } 5536 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5537 coarse_reuse = PETSC_FALSE; 5538 } 5539 /* reset any subassembling information */ 5540 if (!coarse_reuse || pcbddc->recompute_topography) { 5541 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5542 } 5543 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5544 coarse_reuse = PETSC_TRUE; 5545 } 5546 /* assemble coarse matrix */ 5547 if (coarse_reuse && pcbddc->coarse_ksp) { 5548 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5549 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5550 coarse_mat_reuse = MAT_REUSE_MATRIX; 5551 } else { 5552 coarse_mat = NULL; 5553 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5554 } 5555 5556 /* creates temporary l2gmap and IS for coarse indexes */ 5557 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5558 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5559 5560 /* creates temporary MATIS object for coarse matrix */ 5561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5562 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5563 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5564 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5565 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5566 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5567 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5568 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5569 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5570 5571 /* count "active" (i.e. with positive local size) and "void" processes */ 5572 im_active = !!(pcis->n); 5573 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5574 5575 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5576 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5577 /* full_restr : just use the receivers from the subassembling pattern */ 5578 coarse_mat_is = NULL; 5579 multilevel_allowed = PETSC_FALSE; 5580 multilevel_requested = PETSC_FALSE; 5581 full_restr = PETSC_TRUE; 5582 pcbddc->coarse_eqs_per_proc = PetscMin(pcbddc->coarse_size,pcbddc->coarse_eqs_per_proc); 5583 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5584 if (multilevel_requested) { 5585 ncoarse = active_procs/pcbddc->coarsening_ratio; 5586 restr = PETSC_FALSE; 5587 full_restr = PETSC_FALSE; 5588 } else { 5589 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5590 restr = PETSC_TRUE; 5591 full_restr = PETSC_TRUE; 5592 } 5593 ncoarse = PetscMax(1,ncoarse); 5594 if (!pcbddc->coarse_subassembling) { 5595 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5596 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5597 PetscInt psum; 5598 PetscMPIInt size; 5599 if (pcbddc->coarse_ksp) psum = 1; 5600 else psum = 0; 5601 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5602 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5603 if (ncoarse < size) have_void = PETSC_TRUE; 5604 } 5605 /* determine if we can go multilevel */ 5606 if (multilevel_requested) { 5607 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5608 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5609 } 5610 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5611 5612 /* dump subassembling pattern */ 5613 if (pcbddc->dbg_flag && multilevel_allowed) { 5614 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5615 } 5616 5617 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5618 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5619 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5620 const PetscInt *idxs; 5621 ISLocalToGlobalMapping tmap; 5622 5623 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5624 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5625 /* allocate space for temporary storage */ 5626 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5627 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5628 /* allocate for IS array */ 5629 nisdofs = pcbddc->n_ISForDofsLocal; 5630 nisneu = !!pcbddc->NeumannBoundariesLocal; 5631 nisvert = 0; /* nisvert is not used */ 5632 nis = nisdofs + nisneu + nisvert; 5633 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5634 /* dofs splitting */ 5635 for (i=0;i<nisdofs;i++) { 5636 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5637 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5638 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5639 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5640 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5641 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5642 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5643 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5644 } 5645 /* neumann boundaries */ 5646 if (pcbddc->NeumannBoundariesLocal) { 5647 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5648 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5649 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5650 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5651 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5652 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5653 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5654 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5655 } 5656 /* free memory */ 5657 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5658 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5659 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5660 } else { 5661 nis = 0; 5662 nisdofs = 0; 5663 nisneu = 0; 5664 nisvert = 0; 5665 isarray = NULL; 5666 } 5667 /* destroy no longer needed map */ 5668 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5669 5670 /* subassemble */ 5671 if (multilevel_allowed) { 5672 PetscBool reuse,reuser; 5673 if (coarse_mat) reuse = PETSC_TRUE; 5674 else reuse = PETSC_FALSE; 5675 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5676 if (reuser) { 5677 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray);CHKERRQ(ierr); 5678 } else { 5679 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5680 } 5681 } else { 5682 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5683 } 5684 if (coarse_mat_is || coarse_mat) { 5685 PetscMPIInt size; 5686 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5687 if (!multilevel_allowed) { 5688 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5689 } else { 5690 Mat A; 5691 5692 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5693 if (coarse_mat_is) { 5694 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5695 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5696 coarse_mat = coarse_mat_is; 5697 } 5698 /* be sure we don't have MatSeqDENSE as local mat */ 5699 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5700 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5701 } 5702 } 5703 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5704 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5705 5706 /* create local to global scatters for coarse problem */ 5707 if (compute_vecs) { 5708 PetscInt lrows; 5709 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5710 if (coarse_mat) { 5711 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5712 } else { 5713 lrows = 0; 5714 } 5715 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5716 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5717 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5718 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5719 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5720 } 5721 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5722 5723 /* set defaults for coarse KSP and PC */ 5724 if (multilevel_allowed) { 5725 coarse_ksp_type = KSPRICHARDSON; 5726 coarse_pc_type = PCBDDC; 5727 } else { 5728 coarse_ksp_type = KSPPREONLY; 5729 coarse_pc_type = PCREDUNDANT; 5730 } 5731 5732 /* print some info if requested */ 5733 if (pcbddc->dbg_flag) { 5734 if (!multilevel_allowed) { 5735 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5736 if (multilevel_requested) { 5737 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5738 } else if (pcbddc->max_levels) { 5739 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5740 } 5741 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5742 } 5743 } 5744 5745 /* create the coarse KSP object only once with defaults */ 5746 if (coarse_mat) { 5747 PetscViewer dbg_viewer = NULL; 5748 if (pcbddc->dbg_flag) { 5749 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5750 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5751 } 5752 if (!pcbddc->coarse_ksp) { 5753 char prefix[256],str_level[16]; 5754 size_t len; 5755 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5756 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5757 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5758 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5759 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5760 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5761 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5762 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5763 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5764 /* prefix */ 5765 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5766 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5767 if (!pcbddc->current_level) { 5768 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5769 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5770 } else { 5771 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5772 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5773 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5774 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5775 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5776 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5777 } 5778 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5779 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5780 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5781 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5782 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5783 /* allow user customization */ 5784 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5785 } 5786 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5787 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5788 if (nisdofs) { 5789 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5790 for (i=0;i<nisdofs;i++) { 5791 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5792 } 5793 } 5794 if (nisneu) { 5795 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5796 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5797 } 5798 if (nisvert) { 5799 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5800 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5801 } 5802 5803 /* get some info after set from options */ 5804 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5805 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5806 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5807 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5808 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5809 isbddc = PETSC_FALSE; 5810 } 5811 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5812 if (isredundant) { 5813 KSP inner_ksp; 5814 PC inner_pc; 5815 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5816 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5817 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5818 } 5819 5820 /* parameters which miss an API */ 5821 if (isbddc) { 5822 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 5823 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 5824 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 5825 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 5826 if (pcbddc_coarse->benign_saddle_point) { 5827 pcbddc_coarse->benign_compute_nonetflux = PETSC_TRUE; 5828 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 5829 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 5830 } 5831 } 5832 5833 /* propagate symmetry info of coarse matrix */ 5834 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5835 if (pc->pmat->symmetric_set) { 5836 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5837 } 5838 if (pc->pmat->hermitian_set) { 5839 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5840 } 5841 if (pc->pmat->spd_set) { 5842 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5843 } 5844 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 5845 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 5846 } 5847 /* set operators */ 5848 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5849 if (pcbddc->dbg_flag) { 5850 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5851 } 5852 } 5853 ierr = PetscFree(isarray);CHKERRQ(ierr); 5854 #if 0 5855 { 5856 PetscViewer viewer; 5857 char filename[256]; 5858 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5859 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5860 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5861 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5862 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 5863 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5864 } 5865 #endif 5866 5867 /* Compute coarse null space (special handling by BDDC only) */ 5868 #if 0 5869 if (pcbddc->NullSpace) { 5870 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5871 } 5872 #endif 5873 /* hack */ 5874 if (pcbddc->coarse_ksp) { 5875 Vec crhs,csol; 5876 5877 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5878 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5879 if (!csol) { 5880 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5881 } 5882 if (!crhs) { 5883 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5884 } 5885 } 5886 5887 /* compute null space for coarse solver if the benign trick has been requested */ 5888 if (pcbddc->benign_null) { 5889 5890 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5891 for (i=0;i<pcbddc->benign_n;i++) { 5892 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5893 } 5894 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5895 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5896 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5897 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5898 if (coarse_mat) { 5899 Vec nullv; 5900 PetscScalar *array,*array2; 5901 PetscInt nl; 5902 5903 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5904 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5905 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5906 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5907 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5908 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5909 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5910 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5911 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5912 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5913 } 5914 } 5915 5916 if (pcbddc->coarse_ksp) { 5917 PetscBool ispreonly; 5918 5919 if (CoarseNullSpace) { 5920 PetscBool isnull; 5921 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5922 if (0) { 5923 if (isbddc && !pcbddc->benign_saddle_point) { 5924 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5925 } else { 5926 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5927 } 5928 } else { 5929 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5930 } 5931 } 5932 /* setup coarse ksp */ 5933 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5934 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5935 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5936 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5937 KSP check_ksp; 5938 KSPType check_ksp_type; 5939 PC check_pc; 5940 Vec check_vec,coarse_vec; 5941 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5942 PetscInt its; 5943 PetscBool compute_eigs; 5944 PetscReal *eigs_r,*eigs_c; 5945 PetscInt neigs; 5946 const char *prefix; 5947 5948 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5949 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5950 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5951 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5952 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5953 /* prevent from setup unneeded object */ 5954 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 5955 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 5956 if (ispreonly) { 5957 check_ksp_type = KSPPREONLY; 5958 compute_eigs = PETSC_FALSE; 5959 } else { 5960 check_ksp_type = KSPGMRES; 5961 compute_eigs = PETSC_TRUE; 5962 } 5963 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5964 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5965 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5966 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5967 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5968 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5969 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5970 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5971 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5972 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5973 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5974 /* create random vec */ 5975 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5976 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5977 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5978 if (CoarseNullSpace) { 5979 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5980 } 5981 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5982 /* solve coarse problem */ 5983 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5984 if (CoarseNullSpace) { 5985 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5986 } 5987 /* set eigenvalue estimation if preonly has not been requested */ 5988 if (compute_eigs) { 5989 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5990 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5991 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5992 lambda_max = eigs_r[neigs-1]; 5993 lambda_min = eigs_r[0]; 5994 if (pcbddc->use_coarse_estimates) { 5995 if (lambda_max>lambda_min) { 5996 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5997 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5998 } 5999 } 6000 } 6001 6002 /* check coarse problem residual error */ 6003 if (pcbddc->dbg_flag) { 6004 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6005 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6006 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6007 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6008 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6009 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6010 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6011 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6012 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6013 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6014 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6015 if (CoarseNullSpace) { 6016 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6017 } 6018 if (compute_eigs) { 6019 PetscReal lambda_max_s,lambda_min_s; 6020 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6021 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6022 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6023 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6024 for (i=0;i<neigs;i++) { 6025 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6026 } 6027 } 6028 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6029 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6030 } 6031 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6032 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6033 if (compute_eigs) { 6034 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6035 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6036 } 6037 } 6038 } 6039 /* print additional info */ 6040 if (pcbddc->dbg_flag) { 6041 /* waits until all processes reaches this point */ 6042 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6043 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6044 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6045 } 6046 6047 /* free memory */ 6048 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6049 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6050 PetscFunctionReturn(0); 6051 } 6052 6053 #undef __FUNCT__ 6054 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6055 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6056 { 6057 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6058 PC_IS* pcis = (PC_IS*)pc->data; 6059 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6060 IS subset,subset_mult,subset_n; 6061 PetscInt local_size,coarse_size=0; 6062 PetscInt *local_primal_indices=NULL; 6063 const PetscInt *t_local_primal_indices; 6064 PetscErrorCode ierr; 6065 6066 PetscFunctionBegin; 6067 /* Compute global number of coarse dofs */ 6068 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6069 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6070 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6071 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6072 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6073 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6074 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6075 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6076 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6077 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6078 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6079 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6080 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6081 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6082 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6083 6084 /* check numbering */ 6085 if (pcbddc->dbg_flag) { 6086 PetscScalar coarsesum,*array,*array2; 6087 PetscInt i; 6088 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6089 6090 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6091 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6092 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6093 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6094 /* counter */ 6095 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6096 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6097 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6098 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6099 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6100 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6101 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6102 for (i=0;i<pcbddc->local_primal_size;i++) { 6103 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6104 } 6105 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6106 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6107 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6108 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6109 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6110 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6111 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6112 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6113 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6114 for (i=0;i<pcis->n;i++) { 6115 if (array[i] != 0.0 && array[i] != array2[i]) { 6116 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6117 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6118 set_error = PETSC_TRUE; 6119 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6121 } 6122 } 6123 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6124 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6125 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6126 for (i=0;i<pcis->n;i++) { 6127 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6128 } 6129 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6130 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6131 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6132 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6133 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6134 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6135 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6136 PetscInt *gidxs; 6137 6138 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6139 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6140 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6141 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6143 for (i=0;i<pcbddc->local_primal_size;i++) { 6144 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6145 } 6146 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6147 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6148 } 6149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6150 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6151 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6152 } 6153 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6154 /* get back data */ 6155 *coarse_size_n = coarse_size; 6156 *local_primal_indices_n = local_primal_indices; 6157 PetscFunctionReturn(0); 6158 } 6159 6160 #undef __FUNCT__ 6161 #define __FUNCT__ "PCBDDCGlobalToLocal" 6162 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6163 { 6164 IS localis_t; 6165 PetscInt i,lsize,*idxs,n; 6166 PetscScalar *vals; 6167 PetscErrorCode ierr; 6168 6169 PetscFunctionBegin; 6170 /* get indices in local ordering exploiting local to global map */ 6171 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6172 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6173 for (i=0;i<lsize;i++) vals[i] = 1.0; 6174 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6175 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6176 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6177 if (idxs) { /* multilevel guard */ 6178 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6179 } 6180 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6181 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6182 ierr = PetscFree(vals);CHKERRQ(ierr); 6183 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6184 /* now compute set in local ordering */ 6185 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6186 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6187 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6188 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6189 for (i=0,lsize=0;i<n;i++) { 6190 if (PetscRealPart(vals[i]) > 0.5) { 6191 lsize++; 6192 } 6193 } 6194 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6195 for (i=0,lsize=0;i<n;i++) { 6196 if (PetscRealPart(vals[i]) > 0.5) { 6197 idxs[lsize++] = i; 6198 } 6199 } 6200 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6201 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6202 *localis = localis_t; 6203 PetscFunctionReturn(0); 6204 } 6205 6206 #undef __FUNCT__ 6207 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6208 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6209 { 6210 PC_IS *pcis=(PC_IS*)pc->data; 6211 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6212 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6213 Mat S_j; 6214 PetscInt *used_xadj,*used_adjncy; 6215 PetscBool free_used_adj; 6216 PetscErrorCode ierr; 6217 6218 PetscFunctionBegin; 6219 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6220 free_used_adj = PETSC_FALSE; 6221 if (pcbddc->sub_schurs_layers == -1) { 6222 used_xadj = NULL; 6223 used_adjncy = NULL; 6224 } else { 6225 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6226 used_xadj = pcbddc->mat_graph->xadj; 6227 used_adjncy = pcbddc->mat_graph->adjncy; 6228 } else if (pcbddc->computed_rowadj) { 6229 used_xadj = pcbddc->mat_graph->xadj; 6230 used_adjncy = pcbddc->mat_graph->adjncy; 6231 } else { 6232 PetscBool flg_row=PETSC_FALSE; 6233 const PetscInt *xadj,*adjncy; 6234 PetscInt nvtxs; 6235 6236 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6237 if (flg_row) { 6238 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6239 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6240 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6241 free_used_adj = PETSC_TRUE; 6242 } else { 6243 pcbddc->sub_schurs_layers = -1; 6244 used_xadj = NULL; 6245 used_adjncy = NULL; 6246 } 6247 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6248 } 6249 } 6250 6251 /* setup sub_schurs data */ 6252 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6253 if (!sub_schurs->schur_explicit) { 6254 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6255 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6256 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6257 } else { 6258 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6259 PetscBool isseqaij,need_change = PETSC_FALSE;; 6260 PetscInt benign_n; 6261 Mat change = NULL; 6262 Vec scaling = NULL; 6263 IS change_primal = NULL; 6264 6265 if (!pcbddc->use_vertices && reuse_solvers) { 6266 PetscInt n_vertices; 6267 6268 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6269 reuse_solvers = (PetscBool)!n_vertices; 6270 } 6271 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6272 if (!isseqaij) { 6273 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6274 if (matis->A == pcbddc->local_mat) { 6275 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6276 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6277 } else { 6278 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6279 } 6280 } 6281 if (!pcbddc->benign_change_explicit) { 6282 benign_n = pcbddc->benign_n; 6283 } else { 6284 benign_n = 0; 6285 } 6286 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6287 We need a global reduction to avoid possible deadlocks. 6288 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6289 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6290 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6291 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6292 need_change = (PetscBool)(!need_change); 6293 } 6294 /* If the user defines additional constraints, we import them here. 6295 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6296 if (need_change) { 6297 PC_IS *pcisf; 6298 PC_BDDC *pcbddcf; 6299 PC pcf; 6300 6301 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6302 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6303 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6304 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6305 /* hacks */ 6306 pcisf = (PC_IS*)pcf->data; 6307 pcisf->is_B_local = pcis->is_B_local; 6308 pcisf->vec1_N = pcis->vec1_N; 6309 pcisf->BtoNmap = pcis->BtoNmap; 6310 pcisf->n = pcis->n; 6311 pcisf->n_B = pcis->n_B; 6312 pcbddcf = (PC_BDDC*)pcf->data; 6313 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6314 pcbddcf->mat_graph = pcbddc->mat_graph; 6315 pcbddcf->use_faces = PETSC_TRUE; 6316 pcbddcf->use_change_of_basis = PETSC_TRUE; 6317 pcbddcf->use_change_on_faces = PETSC_TRUE; 6318 pcbddcf->use_qr_single = PETSC_TRUE; 6319 pcbddcf->fake_change = PETSC_TRUE; 6320 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6321 /* store information on primal vertices and change of basis (in local numbering) */ 6322 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6323 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6324 change = pcbddcf->ConstraintMatrix; 6325 pcbddcf->ConstraintMatrix = NULL; 6326 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6327 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6328 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6329 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6330 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6331 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6332 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6333 pcf->ops->destroy = NULL; 6334 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6335 } 6336 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6337 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6338 ierr = MatDestroy(&change);CHKERRQ(ierr); 6339 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6340 } 6341 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6342 6343 /* free adjacency */ 6344 if (free_used_adj) { 6345 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6346 } 6347 PetscFunctionReturn(0); 6348 } 6349 6350 #undef __FUNCT__ 6351 #define __FUNCT__ "PCBDDCInitSubSchurs" 6352 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6353 { 6354 PC_IS *pcis=(PC_IS*)pc->data; 6355 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6356 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6357 PCBDDCGraph graph; 6358 PetscErrorCode ierr; 6359 6360 PetscFunctionBegin; 6361 /* attach interface graph for determining subsets */ 6362 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6363 IS verticesIS,verticescomm; 6364 PetscInt vsize,*idxs; 6365 6366 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6367 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6368 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6369 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6370 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6371 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6372 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6373 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6374 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6375 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6376 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6377 } else { 6378 graph = pcbddc->mat_graph; 6379 } 6380 /* print some info */ 6381 if (pcbddc->dbg_flag) { 6382 IS vertices; 6383 PetscInt nv,nedges,nfaces; 6384 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6385 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6386 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6387 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6388 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6390 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6392 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6394 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6395 } 6396 6397 /* sub_schurs init */ 6398 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6399 6400 /* free graph struct */ 6401 if (pcbddc->sub_schurs_rebuild) { 6402 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6403 } 6404 PetscFunctionReturn(0); 6405 } 6406 6407 #undef __FUNCT__ 6408 #define __FUNCT__ "PCBDDCCheckOperator" 6409 PetscErrorCode PCBDDCCheckOperator(PC pc) 6410 { 6411 PC_IS *pcis=(PC_IS*)pc->data; 6412 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6413 PetscErrorCode ierr; 6414 6415 PetscFunctionBegin; 6416 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6417 IS zerodiag = NULL; 6418 Mat S_j,B0_B=NULL; 6419 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6420 PetscScalar *p0_check,*array,*array2; 6421 PetscReal norm; 6422 PetscInt i; 6423 6424 /* B0 and B0_B */ 6425 if (zerodiag) { 6426 IS dummy; 6427 6428 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6429 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6430 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6431 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6432 } 6433 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6434 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6435 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6436 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6437 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6438 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6439 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6440 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6441 /* S_j */ 6442 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6443 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6444 6445 /* mimic vector in \widetilde{W}_\Gamma */ 6446 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6447 /* continuous in primal space */ 6448 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6449 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6450 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6451 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6452 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6453 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6454 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6455 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6456 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6457 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6458 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6459 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6460 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6461 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6462 6463 /* assemble rhs for coarse problem */ 6464 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6465 /* local with Schur */ 6466 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6467 if (zerodiag) { 6468 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6469 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6470 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6471 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6472 } 6473 /* sum on primal nodes the local contributions */ 6474 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6475 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6476 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6477 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6478 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6479 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6480 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6481 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6482 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6483 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6484 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6485 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6486 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6487 /* scale primal nodes (BDDC sums contibutions) */ 6488 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6489 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6490 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6491 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6492 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6493 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6494 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6495 /* global: \widetilde{B0}_B w_\Gamma */ 6496 if (zerodiag) { 6497 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6498 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6499 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6500 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6501 } 6502 /* BDDC */ 6503 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6504 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6505 6506 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6507 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6508 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6509 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6510 for (i=0;i<pcbddc->benign_n;i++) { 6511 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6512 } 6513 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6514 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6515 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6516 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6517 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6518 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6519 } 6520 PetscFunctionReturn(0); 6521 } 6522